summaryrefslogtreecommitdiffstats
path: root/cli
diff options
context:
space:
mode:
authorSachin Pandit <spandit@redhat.com>2014-04-01 09:20:05 +0530
committerKrishnan Parthasarathi <kparthas@redhat.com>2014-06-03 02:02:44 -0700
commit31520d9334f9a40aa329d54651bca7e2c2e5d545 (patch)
treec61d0257b2e4fbb34209d34ccbd979097c4a8391 /cli
parent0fe5ab5b9215b8f0ecfb8bc4ba15a5370850654a (diff)
glusterd/status : First fetch the snapcount and then send the rpc call
for individual snapshots for snapshot status Problem : Initially, we used to do all the calculation in the glusterd side, once all the information related to snap was fetched, it was aggregated into one dictionary and that was sent back to CLI. Problem with this approach was, when number of snapshots are very high then CLI will timeout. Solution: First fetch snapcount and snapname from glusterd, then make a individual calls using the snapname fetched. This will resolve the timeout problem. Change-Id: I32609b3898ed227c804dd4d8ee4516f081240756 BUG: 1087676 Signed-off-by: Sachin Pandit <spandit@redhat.com> Reviewed-on: http://review.gluster.org/7456 Reviewed-by: Avra Sengupta <asengupt@redhat.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
Diffstat (limited to 'cli')
-rw-r--r--cli/src/cli-cmd-parser.c7
-rw-r--r--cli/src/cli-rpc-ops.c278
2 files changed, 221 insertions, 64 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 287943777df..5e619f3cd17 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -3532,12 +3532,13 @@ cli_snap_status_parse (dict_t *dict, const char **words, int wordcount)
out:
if (ret == 0) {
- ret = dict_set_int32 (dict, "cmd", cmd);
+ ret = dict_set_int32 (dict, "status-cmd", cmd);
if (ret) {
gf_log ("cli", GF_LOG_ERROR, "Could not save cmd "
"of snapshot status");
}
}
+
return ret;
}
@@ -3812,7 +3813,9 @@ cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
} else if (!strcmp (w, "deactivate")) {
type = GF_SNAP_OPTION_TYPE_DEACTIVATE;
}
- if (type != GF_SNAP_OPTION_TYPE_CONFIG) {
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG &&
+ type != GF_SNAP_OPTION_TYPE_STATUS) {
ret = dict_set_int32 (dict, "hold_snap_locks", _gf_true);
if (ret) {
gf_log ("cli", GF_LOG_ERROR,
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 9312f15dc20..37424c68559 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -82,6 +82,9 @@ char *cli_vol_task_status_str[] = {"not started",
};
int32_t
+gf_cli_snapshot (call_frame_t *frame, xlator_t *this, void *data);
+
+int32_t
gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
void *data);
@@ -7636,14 +7639,13 @@ out:
}
int32_t
-cli_snapshot_remove_reply (gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+cli_snapshot_remove_reply (gf_cli_rsp *rsp, dict_t *dict)
{
int32_t ret = -1;
char *snap_name = NULL;
GF_ASSERT (rsp);
GF_ASSERT (dict);
- GF_ASSERT (frame);
if (rsp->op_ret) {
cli_err("snapshot delete: failed: %s",
@@ -8361,99 +8363,128 @@ out:
}
int
-cli_snap_status_all (dict_t *dict) {
+cli_populate_req_dict_for_status (dict_t *snap_dict, dict_t *dict, int index) {
int ret = -1;
char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int type = 0;
int snapcount = 0;
- int i = 0;
+ GF_ASSERT (snap_dict);
GF_ASSERT (dict);
- ret = dict_get_int32 (dict, "status.snapcount", &snapcount);
+ ret = dict_set_uint32 (snap_dict, "status-cmd",
+ GF_SNAP_STATUS_TYPE_SNAP);
if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not get snapcount");
+ gf_log ("cli", GF_LOG_ERROR, "Could not save command "
+ "type in snap dict");
goto out;
}
- if (snapcount == 0) {
- cli_out ("No snapshots present");
+ ret = snprintf (key, sizeof (key), "status.snap%d.snapname", index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get snapname");
+ goto out;
}
- for (i = 0 ; i < snapcount; i++) {
- ret = snprintf (key, sizeof (key), "status.snap%d",i);
- if (ret < 0) {
- goto out;
- }
- ret = cli_get_single_snap_status (dict, key);
+ ret = dict_set_str (snap_dict, "snapname", buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snapname "
+ "in snap dict");
+ goto out;
+
}
+
+ ret = dict_set_int32 (snap_dict, "type", GF_SNAP_OPTION_TYPE_STATUS);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not save command type");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc (snap_dict, "cmd-str",
+ "snapshot status");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not save command string as status");
+ goto out;
+ }
+
+ ret = dict_set_int32 (snap_dict, "hold_vol_locks", _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Setting volume lock flag failed");
+ goto out;
+ }
+
out:
return ret;
}
-
int
-cli_snapshot_status_display (dict_t *dict, gf_cli_rsp *rsp)
+cli_snapshot_status (dict_t *dict, gf_cli_rsp *rsp,
+ call_frame_t *frame)
{
char key[PATH_MAX] = "";
int ret = -1;
int status_cmd = -1;
+ cli_local_t *local = NULL;
GF_ASSERT (dict);
GF_ASSERT (rsp);
+ GF_ASSERT (frame);
+
+ local = ((call_frame_t *) frame) -> local;
+ if (!local) {
+ gf_log ("cli", GF_LOG_ERROR, "frame->local is NULL");
+ goto out;
+ }
if (rsp->op_ret) {
- cli_err ("Snapshot Status : failed: %s",
- rsp->op_errstr ? rsp->op_errstr :
- "Please check log file for details");
+ if (rsp->op_errstr) {
+ ret = dict_set_dynstr_with_alloc (local->dict,
+ "op_err_str",
+ rsp->op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set "
+ "op_errstr in local dictionary");
+ goto out;
+ }
+ }
ret = rsp->op_ret;
goto out;
}
- ret = dict_get_int32 (dict, "cmd", &status_cmd);
+ ret = dict_get_int32 (dict, "status-cmd", &status_cmd);
if (ret) {
gf_log ("cli", GF_LOG_ERROR, "Could not fetch status type");
goto out;
}
- switch (status_cmd) {
- case GF_SNAP_STATUS_TYPE_ALL:
- {
- ret = cli_snap_status_all (dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
- "status of all snap");
- goto out;
- }
- break;
- }
- case GF_SNAP_STATUS_TYPE_SNAP:
- {
- ret = snprintf (key, sizeof (key), "status.snap0");
- if (ret < 0) {
- goto out;
- }
- ret = cli_get_single_snap_status (dict, key);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
- "status of snap");
- goto out;
- }
- break;
- }
+ if (status_cmd != GF_SNAP_STATUS_TYPE_SNAP) {
+ dict_copy (dict, local->dict);
+ goto out;
+ }
- case GF_SNAP_STATUS_TYPE_VOL:
- {
- ret = cli_snap_status_all (dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
- "status of snap in a volume");
- goto out;
- }
- break;
- }
- default:
- break;
+
+ ret = snprintf (key, sizeof (key), "status.snap0");
+ if (ret < 0) {
+ goto out;
}
+
+ ret = cli_get_single_snap_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap");
+ goto out;
+ }
+
+ ret = 0;
out:
return ret;
}
@@ -8650,7 +8681,7 @@ gf_cli_snapshot_cbk (struct rpc_req *req, struct iovec *iov,
break;
case GF_SNAP_OPTION_TYPE_DELETE:
- ret = cli_snapshot_remove_reply (&rsp, dict, frame);
+ ret = cli_snapshot_remove_reply (&rsp, dict);
if (ret) {
gf_log ("cli", GF_LOG_ERROR,
"Failed to delete snap");
@@ -8659,7 +8690,7 @@ gf_cli_snapshot_cbk (struct rpc_req *req, struct iovec *iov,
break;
case GF_SNAP_OPTION_TYPE_STATUS:
- ret = cli_snapshot_status_display (dict, &rsp);
+ ret = cli_snapshot_status (dict, &rsp, frame);
if (ret) {
gf_log ("cli", GF_LOG_ERROR, "Failed to display "
"snapshot status output.");
@@ -8684,23 +8715,146 @@ out:
}
int32_t
+gf_cli_snapshot_for_status (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+ int32_t cmd = -1;
+ cli_local_t *local = NULL;
+ dict_t *snap_dict = NULL;
+ int snapcount = 0;
+ int i = 0;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ if (frame->local) {
+ local = frame->local;
+ } else {
+ goto out;
+ }
+
+ options = data;
+
+ ret = dict_get_int32 (local->dict, "status-cmd", &cmd);
+
+ if (cmd == GF_SNAP_STATUS_TYPE_ALL ||
+ cmd == GF_SNAP_STATUS_TYPE_VOL) {
+
+ ret = dict_get_int32 (local->dict, "status.snapcount",
+ &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 0 ; i < snapcount; i++) {
+ ret = -1;
+
+ snap_dict = dict_new();
+ if (!snap_dict)
+ goto out;
+
+ ret = cli_populate_req_dict_for_status (snap_dict,
+ local->dict, i);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not "
+ "populate snap request dictionary");
+ goto out;
+ }
+
+ ret = cli_to_glusterd (&req, frame,
+ gf_cli_snapshot_cbk,
+ (xdrproc_t) xdr_gf_cli_req, snap_dict,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog,
+ NULL);
+
+ /* Ignore the return value and error for snapshot
+ * status of type "ALL" or "VOL"
+ *
+ * Scenario : There might be case where status command
+ * and delete command might be issued at the same time.
+ * In that case when status tried to fetch detail of
+ * snap which has been deleted by concurrent command,
+ * then it will show snapshot not present. Which will
+ * not be appropriate.
+ */
+ dict_unref (snap_dict);
+ }
+ }
+out:
+ return ret;
+
+ if (ret && snap_dict)
+ dict_unref (snap_dict);
+}
+
+int32_t
gf_cli_snapshot (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
- dict_t *options = NULL;
- int ret = -1;
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+ int tmp_ret = -1;
+ cli_local_t *local = NULL;
+ char *err_str = NULL;
+ int type = -1;
if (!frame || !this || !data)
goto out;
+ if (frame->local) {
+ local = frame->local;
+ } else {
+ goto out;
+ }
+
options = data;
+ ret = dict_get_int32 (local->dict, "type", &type);
+
+
ret = cli_to_glusterd (&req, frame, gf_cli_snapshot_cbk,
(xdrproc_t) xdr_gf_cli_req, options,
GLUSTER_CLI_SNAP, this, cli_rpc_prog,
NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "cli_to_glusterd for "
+ "snapshot failed");
+ goto out;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_STATUS == type) {
+ ret = gf_cli_snapshot_for_status (frame, this, data);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "cli to glusterd "
+ "for snapshot status command failed");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
out:
+ if (ret && GF_SNAP_OPTION_TYPE_STATUS == type) {
+ tmp_ret = dict_get_str (local->dict, "op_err_str", &err_str);
+ if (err_str) {
+ cli_err ("Snapshot Status : failed: %s", err_str);
+ dict_del (local->dict, "op_err_str");
+ } else {
+ cli_err ("Snapshot Status : failed: %s", "Please "
+ "check log file for details");
+ }
+ }
+
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
GF_FREE (req.dict.dict_val);
class='right'>112
-rw-r--r--xlators/cluster/ec/src/ec-fops.h146
-rw-r--r--xlators/cluster/ec/src/ec-galois.c3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c110
-rw-r--r--xlators/cluster/ec/src/ec-heal.c375
-rw-r--r--xlators/cluster/ec/src/ec-heald.c165
-rw-r--r--xlators/cluster/ec/src/ec-heald.h9
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c12
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c107
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c199
-rw-r--r--xlators/cluster/ec/src/ec-locks.c110
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h3
-rw-r--r--xlators/cluster/ec/src/ec-messages.h5
-rw-r--r--xlators/cluster/ec/src/ec-method.h2
-rw-r--r--xlators/cluster/ec/src/ec-types.h40
-rw-r--r--xlators/cluster/ec/src/ec.c288
-rw-r--r--xlators/cluster/ec/src/ec.h1
-rw-r--r--xlators/cluster/stripe/src/Makefile.am22
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c658
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h29
-rw-r--r--xlators/cluster/stripe/src/stripe.c5612
-rw-r--r--xlators/cluster/stripe/src/stripe.h291
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h2
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h2
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c19
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h6
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h2
-rw-r--r--xlators/debug/error-gen/src/error-gen.c86
-rw-r--r--xlators/debug/error-gen/src/error-gen.h1
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h2
-rw-r--r--xlators/debug/io-stats/src/io-stats.c429
-rw-r--r--xlators/debug/sink/src/sink.c16
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h2
-rw-r--r--xlators/debug/trace/src/trace.c34
-rw-r--r--xlators/debug/trace/src/trace.h16
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/crypt/Makefile.am3
-rw-r--r--xlators/encryption/crypt/src/Makefile.am26
-rw-r--r--xlators/encryption/crypt/src/atom.c861
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h133
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h41
-rw-r--r--xlators/encryption/crypt/src/crypt.c3906
-rw-r--r--xlators/encryption/crypt/src/crypt.h931
-rw-r--r--xlators/encryption/crypt/src/data.c715
-rw-r--r--xlators/encryption/crypt/src/keys.c284
-rw-r--r--xlators/encryption/crypt/src/metadata.c575
-rw-r--r--xlators/encryption/crypt/src/metadata.h79
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/experimental/Makefile.am3
-rw-r--r--xlators/experimental/README.md107
-rw-r--r--xlators/experimental/dht2/Makefile.am3
-rw-r--r--xlators/experimental/dht2/README.md47
-rw-r--r--xlators/experimental/dht2/TODO.md3
-rw-r--r--xlators/experimental/dht2/dht2-client/src/Makefile.am21
-rw-r--r--xlators/experimental/dht2/dht2-client/src/dht2-client-main.c58
-rw-r--r--xlators/experimental/dht2/dht2-common/src/dht2-common-map.c19
-rw-r--r--xlators/experimental/dht2/dht2-server/src/Makefile.am23
-rw-r--r--xlators/experimental/dht2/dht2-server/src/dht2-server-main.c58
-rw-r--r--xlators/experimental/fdl/Makefile.am3
-rw-r--r--xlators/experimental/fdl/src/Makefile.am48
-rw-r--r--xlators/experimental/fdl/src/dump-tmpl.c.in177
-rw-r--r--xlators/experimental/fdl/src/fdl-tmpl.c.in513
-rw-r--r--xlators/experimental/fdl/src/fdl.h30
-rwxr-xr-xxlators/experimental/fdl/src/gen_dumper.py117
-rwxr-xr-xxlators/experimental/fdl/src/gen_fdl.py354
-rwxr-xr-xxlators/experimental/fdl/src/gen_recon.py218
-rw-r--r--xlators/experimental/fdl/src/logdump.c51
-rw-r--r--xlators/experimental/fdl/src/recon-tmpl.c.in297
-rw-r--r--xlators/experimental/fdl/src/recon.c89
-rw-r--r--xlators/experimental/jbr-client/Makefile.am3
-rw-r--r--xlators/experimental/jbr-client/src/Makefile.am34
-rw-r--r--xlators/experimental/jbr-client/src/fop-template.c.in102
-rwxr-xr-xxlators/experimental/jbr-client/src/gen-fops.py58
-rw-r--r--xlators/experimental/jbr-client/src/jbr-messages.h30
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.c311
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.h27
-rw-r--r--xlators/experimental/jbr-server/Makefile.am3
-rw-r--r--xlators/experimental/jbr-server/src/Makefile.am39
-rw-r--r--xlators/experimental/jbr-server/src/all-templates.c.in501
-rwxr-xr-xxlators/experimental/jbr-server/src/gen-fops.py181
-rw-r--r--xlators/experimental/jbr-server/src/jbr-internal.h118
-rw-r--r--xlators/experimental/jbr-server/src/jbr.c1676
-rw-r--r--xlators/experimental/posix2/Makefile.am3
-rw-r--r--xlators/experimental/posix2/README.md7
-rw-r--r--xlators/experimental/posix2/TODO.md3
-rw-r--r--xlators/experimental/posix2/common/Makefile.am3
-rw-r--r--xlators/experimental/posix2/common/src/Makefile.am16
-rw-r--r--xlators/experimental/posix2/common/src/posix2-common.c18
-rw-r--r--xlators/experimental/posix2/ds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/ds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/ds/src/posix2-ds-main.c56
-rw-r--r--xlators/experimental/posix2/mds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/mds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/mds/src/posix2-mds-main.c56
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h2
-rw-r--r--xlators/features/arbiter/src/arbiter.c19
-rw-r--r--xlators/features/arbiter/src/arbiter.h4
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h2
-rw-r--r--xlators/features/barrier/src/barrier.c100
-rw-r--r--xlators/features/barrier/src/barrier.h9
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h53
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c98
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h2
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h2
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c320
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h36
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h2
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c97
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h3
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h77
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c469
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h54
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py4
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am2
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h32
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c12
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c59
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h7
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c35
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c34
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h2
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c22
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c83
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c17
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h4
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c23
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h8
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c316
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h65
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h2
-rw-r--r--xlators/features/changelog/src/changelog-messages.h125
-rw-r--r--xlators/features/changelog/src/changelog-misc.h4
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c73
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h4
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c185
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h2
-rw-r--r--xlators/features/changelog/src/changelog-rt.c6
-rw-r--r--xlators/features/changelog/src/changelog-rt.h4
-rw-r--r--xlators/features/changelog/src/changelog.c373
-rw-r--r--xlators/features/changetimerecorder/Makefile.am3
-rw-r--r--xlators/features/changetimerecorder/src/Makefile.am26
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c2357
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.h21
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.c293
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h854
-rw-r--r--xlators/features/changetimerecorder/src/ctr-messages.h61
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.c362
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.h68
-rw-r--r--xlators/features/changetimerecorder/src/ctr_mem_types.h22
-rw-r--r--xlators/features/cloudsync/src/Makefile.am4
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c8
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h43
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py38
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am6
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c6
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h8
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am (renamed from xlators/experimental/dht2/dht2-client/Makefile.am)0
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c575
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h32
-rw-r--r--xlators/features/compress/src/cdc-helper.c6
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h2
-rw-r--r--xlators/features/compress/src/cdc.c18
-rw-r--r--xlators/features/compress/src/cdc.h2
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h2
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c40
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h10
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py777
-rw-r--r--xlators/features/glupy/examples/helloworld.py21
-rw-r--r--xlators/features/glupy/examples/negative.py93
-rw-r--r--xlators/features/glupy/src/Makefile.am36
-rw-r--r--xlators/features/glupy/src/__init__.py.in2
-rw-r--r--xlators/features/glupy/src/glupy.c2446
-rw-r--r--xlators/features/glupy/src/glupy.h56
-rw-r--r--xlators/features/glupy/src/glupy.sym101
-rw-r--r--xlators/features/glupy/src/glupy/Makefile.am5
-rw-r--r--xlators/features/glupy/src/glupy/__init__.py852
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/src/index-mem-types.h12
-rw-r--r--xlators/features/index/src/index-messages.h2
-rw-r--r--xlators/features/index/src/index.c39
-rw-r--r--xlators/features/index/src/index.h10
-rw-r--r--xlators/features/leases/src/leases-internal.c84
-rw-r--r--xlators/features/leases/src/leases-mem-types.h5
-rw-r--r--xlators/features/leases/src/leases-messages.h2
-rw-r--r--xlators/features/leases/src/leases.c43
-rw-r--r--xlators/features/leases/src/leases.h64
-rw-r--r--xlators/features/locks/src/clear.c58
-rw-r--r--xlators/features/locks/src/clear.h8
-rw-r--r--xlators/features/locks/src/common.c578
-rw-r--r--xlators/features/locks/src/common.h87
-rw-r--r--xlators/features/locks/src/entrylk.c106
-rw-r--r--xlators/features/locks/src/inodelk.c249
-rw-r--r--xlators/features/locks/src/locks-mem-types.h3
-rw-r--r--xlators/features/locks/src/locks.h105
-rw-r--r--xlators/features/locks/src/pl-messages.h2
-rw-r--r--xlators/features/locks/src/posix.c1421
-rw-r--r--xlators/features/locks/src/reservelk.c78
-rw-r--r--xlators/features/locks/tests/unit-test.c12
-rw-r--r--xlators/features/marker/src/marker-common.c7
-rw-r--r--xlators/features/marker/src/marker-common.h4
-rw-r--r--xlators/features/marker/src/marker-mem-types.h3
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c95
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h12
-rw-r--r--xlators/features/marker/src/marker-quota.c92
-rw-r--r--xlators/features/marker/src/marker-quota.h17
-rw-r--r--xlators/features/marker/src/marker.c48
-rw-r--r--xlators/features/marker/src/marker.h8
-rw-r--r--xlators/features/metadisp/Makefile.am (renamed from xlators/experimental/dht2/dht2-server/Makefile.am)0
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/src/namespace.c20
-rw-r--r--xlators/features/namespace/src/namespace.h4
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h2
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h2
-rw-r--r--xlators/features/quiesce/src/quiesce.c64
-rw-r--r--xlators/features/quiesce/src/quiesce.h4
-rw-r--r--xlators/features/quota/src/Makefile.am5
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c20
-rw-r--r--xlators/features/quota/src/quota-mem-types.h3
-rw-r--r--xlators/features/quota/src/quota-messages.h2
-rw-r--r--xlators/features/quota/src/quota.c268
-rw-r--r--xlators/features/quota/src/quota.h29
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c89
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h10
-rw-r--r--xlators/features/quota/src/quotad-helpers.c6
-rw-r--r--xlators/features/quota/src/quotad.c38
-rw-r--r--xlators/features/quota/src/quotad.sym7
-rw-r--r--xlators/features/read-only/src/read-only-common.c2
-rw-r--r--xlators/features/read-only/src/read-only-common.h4
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h2
-rw-r--r--xlators/features/read-only/src/read-only.c14
-rw-r--r--xlators/features/read-only/src/read-only.h15
-rw-r--r--xlators/features/read-only/src/worm-helper.c24
-rw-r--r--xlators/features/read-only/src/worm.c144
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h2
-rw-r--r--xlators/features/sdfs/src/sdfs.c29
-rw-r--r--xlators/features/sdfs/src/sdfs.h6
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h2
-rw-r--r--xlators/features/selinux/src/selinux-messages.h2
-rw-r--r--xlators/features/selinux/src/selinux.c25
-rw-r--r--xlators/features/selinux/src/selinux.h2
-rw-r--r--xlators/features/shard/src/shard-mem-types.h2
-rw-r--r--xlators/features/shard/src/shard-messages.h2
-rw-r--r--xlators/features/shard/src/shard.c914
-rw-r--r--xlators/features/shard/src/shard.h26
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h2
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h39
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c726
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h15
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c46
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h2
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h4
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c45
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c49
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h35
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c23
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h12
-rw-r--r--xlators/features/trash/src/trash-mem-types.h2
-rw-r--r--xlators/features/trash/src/trash.c23
-rw-r--r--xlators/features/trash/src/trash.h10
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h6
-rw-r--r--xlators/features/upcall/src/upcall-internal.c207
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h2
-rw-r--r--xlators/features/upcall/src/upcall-messages.h2
-rw-r--r--xlators/features/upcall/src/upcall.c142
-rw-r--r--xlators/features/upcall/src/upcall.h34
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c10
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h2
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py28
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py2
-rw-r--r--xlators/features/utime/src/utime-helpers.c11
-rw-r--r--xlators/features/utime/src/utime-helpers.h7
-rw-r--r--xlators/features/utime/src/utime-mem-types.h2
-rw-r--r--xlators/features/utime/src/utime-messages.h5
-rw-r--r--xlators/features/utime/src/utime.c186
-rw-r--r--xlators/features/utime/src/utime.h6
-rw-r--r--xlators/lib/src/libxlator.c15
-rw-r--r--xlators/lib/src/libxlator.h14
-rw-r--r--xlators/meta/src/active-link.c4
-rw-r--r--xlators/meta/src/cmdline-file.c8
-rw-r--r--xlators/meta/src/frames-file.c11
-rw-r--r--xlators/meta/src/graph-dir.c4
-rw-r--r--xlators/meta/src/graphs-dir.c4
-rw-r--r--xlators/meta/src/history-file.c8
-rw-r--r--xlators/meta/src/logfile-link.c4
-rw-r--r--xlators/meta/src/logging-dir.c4
-rw-r--r--xlators/meta/src/loglevel-file.c6
-rw-r--r--xlators/meta/src/mallinfo-file.c6
-rw-r--r--xlators/meta/src/measure-file.c6
-rw-r--r--xlators/meta/src/meminfo-file.c8
-rw-r--r--xlators/meta/src/meta-defaults.c12
-rw-r--r--xlators/meta/src/meta-helpers.c13
-rw-r--r--xlators/meta/src/meta-hooks.h2
-rw-r--r--xlators/meta/src/meta-mem-types.h2
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h2
-rw-r--r--xlators/meta/src/name-file.c8
-rw-r--r--xlators/meta/src/option-file.c4
-rw-r--r--xlators/meta/src/options-dir.c4
-rw-r--r--xlators/meta/src/private-file.c8
-rw-r--r--xlators/meta/src/process_uuid-file.c8
-rw-r--r--xlators/meta/src/profile-file.c8
-rw-r--r--xlators/meta/src/root-dir.c4
-rw-r--r--xlators/meta/src/subvolume-link.c4
-rw-r--r--xlators/meta/src/subvolumes-dir.c4
-rw-r--r--xlators/meta/src/top-link.c4
-rw-r--r--xlators/meta/src/type-file.c8
-rw-r--r--xlators/meta/src/version-file.c8
-rw-r--r--xlators/meta/src/view-dir.c4
-rw-r--r--xlators/meta/src/volfile-file.c6
-rw-r--r--xlators/meta/src/xlator-dir.c4
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1121
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c295
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c1237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c385
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c126
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c118
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h101
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c247
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c896
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c69
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c1398
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c346
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h16
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c96
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c634
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c190
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c674
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c275
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c409
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c1681
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h58
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c836
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h43
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c284
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c199
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tier.c1378
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.c503
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.h41
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c3387
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h106
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1966
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c906
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c369
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c395
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h507
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c904
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h116
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c111
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h2
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in69
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in19
-rw-r--r--xlators/nfs/server/src/acl3.c46
-rw-r--r--xlators/nfs/server/src/acl3.h4
-rw-r--r--xlators/nfs/server/src/auth-cache.c15
-rw-r--r--xlators/nfs/server/src/auth-cache.h2
-rw-r--r--xlators/nfs/server/src/exports.c16
-rw-r--r--xlators/nfs/server/src/exports.h2
-rw-r--r--xlators/nfs/server/src/mount3-auth.c2
-rw-r--r--xlators/nfs/server/src/mount3.c88
-rw-r--r--xlators/nfs/server/src/mount3.h12
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c10
-rw-r--r--xlators/nfs/server/src/netgroups.c2
-rw-r--r--xlators/nfs/server/src/netgroups.h2
-rw-r--r--xlators/nfs/server/src/nfs-common.c17
-rw-r--r--xlators/nfs/server/src/nfs-common.h6
-rw-r--r--xlators/nfs/server/src/nfs-fops.c10
-rw-r--r--xlators/nfs/server/src/nfs-fops.h8
-rw-r--r--xlators/nfs/server/src/nfs-generics.c2
-rw-r--r--xlators/nfs/server/src/nfs-generics.h2
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c2
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h8
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h9
-rw-r--r--xlators/nfs/server/src/nfs-messages.h2
-rw-r--r--xlators/nfs/server/src/nfs.c71
-rw-r--r--xlators/nfs/server/src/nfs.h8
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c16
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h6
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c18
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h2
-rw-r--r--xlators/nfs/server/src/nfs3.c73
-rw-r--r--xlators/nfs/server/src/nfs3.h10
-rw-r--r--xlators/nfs/server/src/nfsserver.sym10
-rw-r--r--xlators/nfs/server/src/nlm4.c122
-rw-r--r--xlators/nfs/server/src/nlm4.h14
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c7
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/decompounder/Makefile.am1
-rw-r--r--xlators/performance/decompounder/src/Makefile.am19
-rw-r--r--xlators/performance/decompounder/src/decompounder-mem-types.h17
-rw-r--r--xlators/performance/decompounder/src/decompounder-messages.h28
-rw-r--r--xlators/performance/decompounder/src/decompounder.c833
-rw-r--r--xlators/performance/decompounder/src/decompounder.h78
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h41
-rw-r--r--xlators/performance/io-cache/src/io-cache.c292
-rw-r--r--xlators/performance/io-cache/src/io-cache.h45
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c14
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h2
-rw-r--r--xlators/performance/io-cache/src/page.c67
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h16
-rw-r--r--xlators/performance/io-threads/src/io-threads.c158
-rw-r--r--xlators/performance/io-threads/src/io-threads.h18
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache.c618
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c59
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h5
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h2
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c22
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h10
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h2
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h8
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1374
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h6
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h2
-rw-r--r--xlators/performance/quick-read/src/quick-read.c64
-rw-r--r--xlators/performance/quick-read/src/quick-read.h22
-rw-r--r--xlators/performance/read-ahead/src/page.c18
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h2
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h2
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c41
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h10
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h2
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h2
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c181
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h2
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am16
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache-messages.h30
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c360
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h2
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h2
-rw-r--r--xlators/performance/write-behind/src/write-behind.c134
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from xlators/cluster/stripe/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/rot-13.c (renamed from xlators/encryption/rot-13/src/rot-13.c)6
-rw-r--r--xlators/playground/rot-13/src/rot-13.h (renamed from xlators/encryption/rot-13/src/rot-13.h)0
-rw-r--r--xlators/playground/template/src/template.c2
-rw-r--r--xlators/playground/template/src/template.h14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c18
-rw-r--r--xlators/protocol/client/src/client-callback.c91
-rw-r--r--xlators/protocol/client/src/client-common.c66
-rw-r--r--xlators/protocol/client/src/client-common.h10
-rw-r--r--xlators/protocol/client/src/client-handshake.c762
-rw-r--r--xlators/protocol/client/src/client-helpers.c2360
-rw-r--r--xlators/protocol/client/src/client-lk.c47
-rw-r--r--xlators/protocol/client/src/client-mem-types.h3
-rw-r--r--xlators/protocol/client/src/client-messages.h125
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c1137
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c1276
-rw-r--r--xlators/protocol/client/src/client.c1356
-rw-r--r--xlators/protocol/client/src/client.h222
-rw-r--r--xlators/protocol/server/src/Makefile.am4
-rw-r--r--xlators/protocol/server/src/authenticate.h8
-rw-r--r--xlators/protocol/server/src/server-common.c53
-rw-r--r--xlators/protocol/server/src/server-common.h11
-rw-r--r--xlators/protocol/server/src/server-handshake.c133
-rw-r--r--xlators/protocol/server/src/server-helpers.c4345
-rw-r--r--xlators/protocol/server/src/server-helpers.h35
-rw-r--r--xlators/protocol/server/src/server-mem-types.h8
-rw-r--r--xlators/protocol/server/src/server-messages.h181
-rw-r--r--xlators/protocol/server/src/server-resolve.c39
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c404
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c1650
-rw-r--r--xlators/protocol/server/src/server.c340
-rw-r--r--xlators/protocol/server/src/server.h88
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am21
-rw-r--r--xlators/storage/bd/src/bd-aio.c518
-rw-r--r--xlators/storage/bd/src/bd-aio.h40
-rw-r--r--xlators/storage/bd/src/bd-helper.c1073
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h26
-rw-r--r--xlators/storage/bd/src/bd.c2426
-rw-r--r--xlators/storage/bd/src/bd.h189
-rw-r--r--xlators/storage/posix/src/Makefile.am2
-rw-r--r--xlators/storage/posix/src/posix-aio.c14
-rw-r--r--xlators/storage/posix/src/posix-aio.h3
-rw-r--r--xlators/storage/posix/src/posix-common.c341
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c445
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c116
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h13
-rw-r--r--xlators/storage/posix/src/posix-handle.c209
-rw-r--r--xlators/storage/posix/src/posix-handle.h33
-rw-r--r--xlators/storage/posix/src/posix-helpers.c902
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c1040
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h20
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h7
-rw-r--r--xlators/storage/posix/src/posix-messages.h6
-rw-r--r--xlators/storage/posix/src/posix-metadata.c561
-rw-r--r--xlators/storage/posix/src/posix-metadata.h25
-rw-r--r--xlators/storage/posix/src/posix.c34
-rw-r--r--xlators/storage/posix/src/posix.h181
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h2
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h2
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h6
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c222
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h7
-rw-r--r--xlators/xlator.sym10
641 files changed, 41272 insertions, 73694 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 29549db724e..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -2,10 +2,10 @@ if BUILD_GNFS
GNFS_DIR = nfs
endif
-DIST_SUBDIRS = cluster storage protocol performance debug features encryption \
+DIST_SUBDIRS = cluster storage protocol performance debug features \
mount nfs mgmt system playground meta
-SUBDIRS = cluster storage protocol performance debug features encryption \
+SUBDIRS = cluster storage protocol performance debug features \
mount ${GNFS_DIR} mgmt system playground meta
EXTRA_DIST = xlator.sym
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 231a7970928..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -15,22 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-#include "events.h"
-#include "upcall-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -47,6 +45,56 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
+ }
+ }
+}
+
int
afr_fav_child_reset_sink_xattrs(void *opaque);
@@ -56,6 +104,581 @@ afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
static void
afr_discover_done(call_frame_t *frame, xlator_t *this);
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
+ }
+ }
+
+ return 0;
+
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
+
+ return 0;
+}
+
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+void
+afr_dom_lock_release(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
+
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
+
+ return;
+}
+
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
+
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
+}
+
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
+out:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
+}
+
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
+}
+
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
+{
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+}
+
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
+}
+
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
+
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
+
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
+}
+
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
+{
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
+}
+
gf_boolean_t
afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
int32_t *op_errno)
@@ -70,6 +693,19 @@ afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
return _gf_true;
}
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
+
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
+
+ return _gf_false;
+}
+
call_frame_t *
afr_copy_frame(call_frame_t *base)
{
@@ -98,21 +734,24 @@ afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
int tmp = 0;
afr_private_t *priv = NULL;
GF_UNUSED char *key = NULL;
+ int keylen = 0;
priv = this->private;
- if (type == AFR_ENTRY_TRANSACTION)
+ if (type == AFR_ENTRY_TRANSACTION) {
key = GLUSTERFS_PARENT_ENTRYLK;
- else if (type == AFR_DATA_TRANSACTION)
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
/*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
* pl_inodelk_xattr_fill supports separate keys for different
* domains.*/
key = GLUSTERFS_INODELK_COUNT;
-
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].xdata)
continue;
- if (dict_get_int32(local->replies[i].xdata, key, &tmp) == 0)
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
if (tmp)
return _gf_true;
}
@@ -260,11 +899,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
count = gf_bits_count(tmp_map);
- if (count == 1)
- index = gf_bits_index(tmp_map);
-
for (i = 0; i < priv->child_count; i++) {
- mask = 0;
if (!local->transaction.failed_subvols[i])
continue;
@@ -278,25 +913,27 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
switch (txn_type) {
case AFR_METADATA_TRANSACTION:
if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
local->transaction.in_flight_sb_errno = local->replies[index]
.op_errno;
local->transaction.in_flight_sb = _gf_true;
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
}
break;
case AFR_DATA_TRANSACTION:
if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
local->transaction.in_flight_sb_errno = local->replies[index]
.op_errno;
local->transaction.in_flight_sb = _gf_true;
datamap |= (1 << index);
}
if (datamap_old != datamap)
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
break;
default:
@@ -460,34 +1097,6 @@ out:
}
int
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
-{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- return ret;
-
- val = ctx->read_subvol;
-
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
-
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
- (((uint64_t)event) << 32);
-
- ctx->read_subvol = val;
-
- return ret;
-}
-
-int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -558,22 +1167,6 @@ out:
}
int
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->child_count <= 16)
- ret = __afr_inode_event_gen_reset_small(inode, this);
- else
- ret = -1;
-
- return ret;
-}
-
-int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -639,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -656,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -722,30 +1348,22 @@ out:
return need_refresh;
}
-static int
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
-
- LOCK(&inode->lock);
- {
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- goto unlock;
-
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
ctx->need_refresh = _gf_true;
}
-unlock:
- UNLOCK(&inode->lock);
-out:
+
return ret;
}
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
@@ -753,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
LOCK(&inode->lock);
{
- ret = __afr_inode_event_gen_reset(inode, this);
+ ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
@@ -773,6 +1391,7 @@ afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
{
ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
gf_msg(this->name, GF_LOG_WARNING, 0,
AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to cancel split-brain choice timer.");
@@ -785,8 +1404,8 @@ afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
}
ret = 0;
}
-out:
UNLOCK(&inode->lock);
+out:
return ret;
}
@@ -818,7 +1437,6 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
gf_boolean_t timer_set = _gf_false;
gf_boolean_t timer_cancelled = _gf_false;
gf_boolean_t timer_reset = _gf_false;
- gf_boolean_t need_invalidate = _gf_true;
int old_spb_choice = -1;
frame = data->frame;
@@ -862,10 +1480,11 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
{
ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret) {
+ UNLOCK(&inode->lock);
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to get inode_ctx for %s", loc->name);
- goto unlock;
+ goto post_unlock;
}
old_spb_choice = ctx->spb_choice;
@@ -929,10 +1548,10 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
timer_set = _gf_true;
if (timer_reset && !ctx->timer)
timer_cancelled = _gf_true;
- need_invalidate = _gf_false;
}
unlock:
UNLOCK(&inode->lock);
+post_unlock:
if (!timer_set)
inode_unref(inode);
if (timer_cancelled)
@@ -942,8 +1561,7 @@ unlock:
* reads from an older cached value despite a change in spb_choice to
* a new value.
*/
- if (need_invalidate)
- inode_invalidate(inode);
+ inode_invalidate(inode);
out:
GF_FREE(data);
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
@@ -988,7 +1606,7 @@ afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
for (i = 0; i < priv->child_count; i++) {
if (replies[i].valid && replies[i].xdata &&
- dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
continue;
if (data_accused[i])
continue;
@@ -1037,7 +1655,7 @@ afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
if (replies) { /* Lookup */
if (!replies[i].valid || replies[i].op_ret == -1 ||
(replies[i].xdata &&
- dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
data_readable[i] = 0;
metadata_readable[i] = 0;
continue;
@@ -1050,6 +1668,8 @@ afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
ia_type = inode->ia_type;
}
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
afr_accused_fill(this, xdata, data_accused,
(ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
: AFR_DATA_TRANSACTION);
@@ -1153,18 +1773,12 @@ ret:
}
gf_boolean_t
-afr_selfheal_enabled(xlator_t *this)
+afr_selfheal_enabled(const xlator_t *this)
{
- afr_private_t *priv = NULL;
- gf_boolean_t data = _gf_false;
- int ret = 0;
+ const afr_private_t *priv = this->private;
- priv = this->private;
-
- ret = gf_string2boolean(priv->data_self_heal, &data);
- GF_ASSERT(!ret);
-
- return data || priv->metadata_self_heal || priv->entry_self_heal;
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
}
int
@@ -1177,7 +1791,6 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
inode_t *inode = NULL;
int event_generation = 0;
int read_subvol = -1;
- int op_errno = ENOMEM;
int ret = 0;
local = frame->local;
@@ -1193,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
+ if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
@@ -1206,18 +1819,12 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
goto refresh_done;
}
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame) {
err = EIO;
goto refresh_done;
}
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- err = EIO;
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->xdata_req = dict_new();
if (!heal_local->xdata_req) {
err = EIO;
@@ -1238,18 +1845,6 @@ refresh_done:
return 0;
}
-static void
-afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
- unsigned char *replies)
-{
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].valid && local->replies[i].op_ret == 0)
- replies[i] = 1;
- }
-}
-
int
afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
{
@@ -1259,7 +1854,6 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
gf_boolean_t start_heal = _gf_false;
afr_local_t *heal_local = NULL;
unsigned char *success_replies = NULL;
- int op_errno = ENOMEM;
int ret = 0;
if (error != 0) {
@@ -1271,32 +1865,32 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
success_replies = alloca0(priv->child_count);
afr_fill_success_replies(local, priv, success_replies);
- if (!afr_has_quorum(success_replies, this)) {
- error = afr_final_errno(frame->local, this->private);
- if (!error)
- error = afr_quorum_errno(priv);
- goto refresh_done;
- }
-
if (priv->thin_arbiter_count && local->is_read_txn &&
AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
/* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
error = EINVAL;
goto refresh_done;
}
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
if (ret && afr_selfheal_enabled(this) && start_heal) {
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame)
goto refresh_done;
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->refreshinode = inode_ref(local->refreshinode);
heal_local->heal_frame = heal_frame;
if (!afr_throttled_selfheal(heal_frame, this)) {
@@ -1333,17 +1927,22 @@ afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata)
local->replies[call_child].xdata = dict_ref(xdata);
}
+
if (xdata) {
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[call_child].need_heal = need_heal;
- } else {
- local->replies[call_child].need_heal = need_heal;
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
}
+ local->replies[call_child].need_heal = need_heal;
call_count = afr_frame_return(frame);
if (call_count == 0) {
afr_set_need_heal(this, local);
ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
+ }
afr_inode_refresh_done(frame, this, ret);
}
}
@@ -1452,12 +2051,12 @@ afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
return 0;
}
- ret = dict_set_str(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
- ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
if (ret) {
gf_msg_debug(this->name, -ret,
"Unable to set inodelk-dom-count in dict ");
@@ -1555,7 +2154,7 @@ afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
"query flag");
}
- ret = dict_set_int32(xattr_req, "list-xattr", 1);
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
}
@@ -1600,7 +2199,8 @@ afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
GLUSTERFS_PARENT_ENTRYLK);
}
- ret = dict_set_str(local->xattr_req, "link-count", GF_XATTROP_INDEX_COUNT);
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
@@ -1611,19 +2211,18 @@ out:
}
int
-afr_least_pending_reads_child(afr_private_t *priv)
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
{
int i = 0;
- int child = 0;
+ int child = -1;
int64_t read_iter = -1;
int64_t pending_read = -1;
- pending_read = GF_ATOMIC_GET(priv->pending_reads[0]);
- for (i = 1; i < priv->child_count; i++) {
- if (AFR_IS_ARBITER_BRICK(priv, i))
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
continue;
read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
- if (read_iter < pending_read) {
+ if (child == -1 || read_iter < pending_read) {
pending_read = read_iter;
child = i;
}
@@ -1632,8 +2231,54 @@ afr_least_pending_reads_child(afr_private_t *priv)
return child;
}
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
+}
+
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
+}
+
int
-afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
uuid_t gfid_copy = {
0,
@@ -1642,14 +2287,14 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
int child = -1;
switch (priv->hash_mode) {
- case 0:
+ case AFR_READ_POLICY_FIRST_UP:
break;
- case 1:
+ case AFR_READ_POLICY_GFID_HASH:
gf_uuid_copy(gfid_copy, args->gfid);
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 2:
+ case AFR_READ_POLICY_GFID_PID_HASH:
if (args->ia_type != IA_IFDIR) {
/*
* Why getpid? Because it's one of the cheapest calls
@@ -1661,14 +2306,21 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 3:
- child = afr_least_pending_reads_child(priv);
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
break;
}
@@ -1701,7 +2353,7 @@ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
}
/* second preference - use hashed mode */
- read_subvol = afr_hash_child(&local_args, priv);
+ read_subvol = afr_hash_child(&local_args, priv, readable);
if (read_subvol >= 0 && readable[read_subvol])
return read_subvol;
@@ -1796,11 +2448,9 @@ afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
afr_matrix_cleanup(local->pending, priv->child_count);
- GF_FREE(local->internal_lock.locked_nodes);
-
GF_FREE(local->internal_lock.lower_locked_nodes);
- afr_entry_lockee_cleanup(&local->internal_lock);
+ afr_lockees_cleanup(&local->internal_lock);
GF_FREE(local->transaction.pre_op);
@@ -2009,6 +2659,9 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
{ /* lk */
GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
}
{ /* create */
@@ -2239,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2249,13 +2902,15 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
- } else if (!priv->quorum_count) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
- } else if (priv->quorum_count && afr_has_quorum(data_readable, this)) {
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
/* read_subvol is guaranteed to be valid if we hit this path. */
*read_subvol = afr_first_up_child(frame, this);
} else {
@@ -2270,7 +2925,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local->loc.path);
}
if (*read_subvol >= 0)
- dict_del(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
}
static void
@@ -2291,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
0,
};
gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
ia_type_t ia_type = IA_INVAL;
@@ -2334,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (!replies[i].valid)
continue;
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto error;
- }
-
- if (replies[i].op_ret == -1)
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
continue;
+ }
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
@@ -2354,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
}
}
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
if (read_subvol == -1)
goto error;
/* We now have a read_subvol, which is readable[] (if there
@@ -2382,7 +3039,8 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
/* If we were called from glfsheal and there is still a gfid
* mismatch, succeed the lookup and let glfsheal print the
* response via gfid-heal-msg.*/
- if (!dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg))
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
goto cant_interpret;
/* LOG ERROR */
@@ -2397,7 +3055,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
read_subvol = -1;
memset(readable, 0, sizeof(*readable) * priv->child_count);
if (can_interpret) {
- if (!afr_has_quorum(success_replies, this))
+ if (!afr_has_quorum(success_replies, this, NULL))
goto cant_interpret;
/* It is safe to call afr_replies_interpret() because we have
a response from all the UP subvolumes and all of them resolved
@@ -2411,8 +3069,8 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
- afr_inode_event_gen_reset(local->inode, this);
- dict_del(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
cant_interpret:
@@ -2436,10 +3094,10 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
goto error;
}
- ret = dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
if (!ret) {
- ret = dict_set_str(local->replies[read_subvol].xdata, "gfid-heal-msg",
- gfid_heal_msg);
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"Error setting gfid-heal-msg dict");
@@ -2464,7 +3122,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -2476,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
@@ -2517,7 +3177,7 @@ afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
child_index = (int32_t)(long)cookie;
- ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
if (ret != 0) {
goto out;
}
@@ -2605,7 +3265,11 @@ afr_lookup_sh_metadata_wrap(void *opaque)
dict = dict_new();
if (!dict)
goto out;
- ret = dict_set_str(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
@@ -2613,7 +3277,7 @@ afr_lookup_sh_metadata_wrap(void *opaque)
if (loc_is_nameless(&local->loc)) {
ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
local->loc.gfid, local->replies,
- local->child_up);
+ local->child_up, dict);
} else {
inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
local->loc.name, local->replies,
@@ -2787,7 +3451,7 @@ afr_lookup_selfheal_wrap(void *opaque)
inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
local->loc.name, local->replies,
- local->child_up, NULL);
+ local->child_up, local->xattr_req);
if (inode)
inode_unref(inode);
@@ -2874,7 +3538,7 @@ afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
if (name_state_mismatch) {
if (!priv->quorum_count)
goto name_heal;
- if (!afr_has_quorum(success, this))
+ if (!afr_has_quorum(success, this, NULL))
goto name_heal;
if (op_errno)
goto name_heal;
@@ -2932,7 +3596,7 @@ afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* with ESTALE so that a fresh lookup will be sent by the top xlator.
* So remember it.
*/
- if (xdata && dict_get(xdata, "gfid-changed"))
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
local->cont.lookup.needs_fresh_lookup = _gf_true;
if (xdata) {
@@ -2962,8 +3626,8 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int op_errno = 0;
int read_subvol = -1;
+ int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;
@@ -2976,17 +3640,19 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (AFR_COUNT(success_replies, priv->child_count) > 0)
local->op_ret = 0;
- op_errno = afr_final_errno(frame->local, this->private);
-
if (local->op_ret < 0) {
- AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
}
- if (!afr_has_quorum(success_replies, this))
+ if (!afr_has_quorum(success_replies, this, frame))
goto unwind;
- afr_replies_interpret(frame, this, local->inode, NULL);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
@@ -2994,11 +3660,8 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
unwind:
afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
&read_subvol);
- if (read_subvol == -1) {
- AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL,
- NULL, NULL, NULL);
- return;
- }
+ if (read_subvol == -1)
+ goto error;
if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
local->op_ret = -1;
@@ -3013,6 +3676,11 @@ unwind:
local->inode, &local->replies[read_subvol].poststat,
local->replies[read_subvol].xdata,
&local->replies[read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
static int
@@ -3036,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3209,8 +3877,6 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
if (__is_root_gfid(loc->inode->gfid)) {
- if (!this->itable)
- this->itable = loc->inode->table;
if (!priv->root_inode)
priv->root_inode = inode_ref(loc->inode);
@@ -3229,10 +3895,15 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
local->inode = inode_ref(loc->inode);
- if (xattr_req)
+ if (xattr_req) {
/* If xattr_req was null, afr_lookup_xattr_req_prepare() will
allocate one for us */
- local->xattr_req = dict_ref(xattr_req);
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
if (gf_uuid_is_null(loc->inode->gfid)) {
afr_discover_do(frame, this, 0);
@@ -3242,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
- else
- afr_discover_do(frame, this, 0);
+ afr_discover_do(frame, this, 0);
return 0;
out:
@@ -3268,7 +3935,6 @@ afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
if (err < 0) {
local->op_errno = err;
- ret = -1;
goto out;
}
@@ -3279,7 +3945,6 @@ afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
&local->loc);
if (ret) {
local->op_errno = -ret;
- ret = -1;
goto out;
}
@@ -3344,16 +4009,15 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
if (loc_is_nameless(loc)) {
if (xattr_req)
- dict_del(xattr_req, "gfid-req");
+ dict_del_sizen(xattr_req, "gfid-req");
afr_discover(frame, this, loc, xattr_req);
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -3382,18 +4046,14 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
&local->cont.lookup.gfid_req);
if (ret == 0) {
- dict_del(local->xattr_req, "gfid-req");
+ dict_del_sizen(local->xattr_req, "gfid-req");
}
}
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
- else
- afr_lookup_do(frame, this, 0);
+ afr_lookup_do(frame, this, 0);
return 0;
out:
@@ -3403,8 +4063,18 @@ out:
}
void
-_afr_cleanup_fd_ctx(afr_fd_ctx_t *fd_ctx)
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
{
+ afr_private_t *priv = this->private;
+
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
GF_FREE(fd_ctx->opened_on);
GF_FREE(fd_ctx);
return;
@@ -3424,7 +4094,7 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long)ctx;
if (fd_ctx) {
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
}
out:
@@ -3517,13 +4187,14 @@ __afr_fd_ctx_set(xlator_t *this, fd_t *fd)
}
fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
if (ret)
gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
out:
if (ret && fd_ctx)
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
return ret;
}
@@ -3547,11 +4218,10 @@ afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -3647,6 +4317,7 @@ afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
call_stub_t *stub = NULL;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -3687,11 +4358,10 @@ afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -4008,7 +4678,7 @@ afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
afr_unlock_locks_and_proceed(frame, this, lock_count);
- } else if (priv->quorum_count && !afr_has_quorum(success, this)) {
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4184,9 +4854,9 @@ out:
}
static int32_t
-afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4198,8 +4868,10 @@ afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.inodelk.volume = gf_strdup(volume);
if (!local->cont.inodelk.volume) {
@@ -4228,8 +4900,8 @@ int32_t
afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_INODELK, volume, loc, NULL, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
return 0;
}
@@ -4237,15 +4909,16 @@ int32_t
afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
return 0;
}
static int
-afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4257,8 +4930,10 @@ afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.entrylk.cmd = cmd;
local->cont.entrylk.in_cmd = cmd;
local->cont.entrylk.type = type;
@@ -4285,8 +4960,8 @@ afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4295,8 +4970,8 @@ afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4308,10 +4983,10 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int call_count = 0;
struct statvfs *buf = NULL;
+ local = frame->local;
+
LOCK(&frame->lock);
{
- local = frame->local;
-
if (op_ret != 0) {
local->op_errno = op_errno;
goto unlock;
@@ -4337,10 +5012,9 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
}
}
unlock:
+ call_count = --local->call_count;
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
&local->cont.statfs.buf, local->xdata_rsp);
@@ -4415,9 +5089,10 @@ afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
call_count = afr_frame_return(frame);
- if (call_count == 0)
+ if (call_count == 0) {
AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
local->xdata_rsp);
+ }
return 0;
}
@@ -4499,7 +5174,7 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
local->cont.lk.cmd, &local->cont.lk.user_flock,
local->xdata_req);
} else if (priv->quorum_count &&
- !afr_has_quorum(local->cont.lk.locked_nodes, this)) {
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4516,11 +5191,133 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
+
+ return 0;
+
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
+
+int
afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ int ret = 0;
int i = 0;
int32_t op_errno = ENOMEM;
@@ -4531,9 +5328,11 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
local->op = GF_FOP_LK;
- if (!afr_lk_is_unlock(cmd, flock) &&
- !afr_is_consistent_io_possible(local, priv, &op_errno))
- goto out;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
local->cont.lk.locked_nodes = GF_CALLOC(
priv->child_count, sizeof(*local->cont.lk.locked_nodes),
@@ -4551,6 +5350,16 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (xdata)
local->xdata_req = dict_ref(xdata);
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ return 0;
+ }
+
STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
priv->children[i]->fops->lk, fd, cmd, flock,
local->xdata_req);
@@ -4654,7 +5463,7 @@ afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
priv->children[child_index]->fops->lease, &local->loc,
&local->cont.lease.user_lease, xdata);
} else if (priv->quorum_count &&
- !afr_has_quorum(local->cont.lease.locked_nodes, this)) {
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4872,8 +5681,10 @@ afr_priv_dump(xlator_t *this)
GF_ATOMIC_GET(priv->pending_reads[i]));
sprintf(key, "child_latency[%d]", i);
gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
}
- gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
gf_proc_dump_write("read_child", "%d", priv->read_child);
@@ -4884,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -4892,7 +5704,7 @@ afr_priv_dump(xlator_t *this)
gf_proc_dump_write("quorum-type", "fixed");
gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
}
- gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this));
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
if (priv->thin_arbiter_count) {
gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
gf_proc_dump_write("ta_bad_child_index", "%d",
@@ -4944,13 +5756,31 @@ __afr_get_up_children_count(afr_private_t *priv)
return up_children;
}
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
+
glusterfs_event_t
-__afr_transform_event_from_state(afr_private_t *priv)
+__afr_transform_event_from_state(xlator_t *this)
{
int i = 0;
int up_children = 0;
+ afr_private_t *priv = this->private;
- if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count)
+ if (__get_heard_from_all_status(this))
/* have_heard_from_all. Let afr_notify() do the propagation. */
return GF_EVENT_MAXVAL;
@@ -4992,7 +5822,7 @@ afr_notify_cbk(void *data)
goto unlock;
}
priv->timer = NULL;
- event = __afr_transform_event_from_state(priv);
+ event = __afr_transform_event_from_state(this);
if (event != GF_EVENT_MAXVAL)
propagate = _gf_true;
}
@@ -5019,22 +5849,6 @@ __afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
}
}
-int
-__get_heard_from_all_status(xlator_t *this)
-{
- afr_private_t *priv = this->private;
- int heard_from_all = 1;
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- heard_from_all = 0;
- break;
- }
- }
- return heard_from_all;
-}
-
static int
find_best_down_child(xlator_t *this)
{
@@ -5046,7 +5860,7 @@ find_best_down_child(xlator_t *this)
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
priv->child_latency[i] < best_latency) {
best_child = i;
best_latency = priv->child_latency[i];
@@ -5118,7 +5932,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child down.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_DOWN;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
}
} else if (child_latency_msec < halo_max_latency_msec &&
priv->child_up[idx] == 0) {
@@ -5130,7 +5946,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child up.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_UP;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
} else {
gf_log(child_xlator->name, GF_LOG_INFO,
"Not marking child %d up, "
@@ -5192,9 +6010,15 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
* want to set the child_latency to MAX to indicate
* the child needs ping data to be available before doing child-up
*/
- if (child_latency_msec < 0 && priv->halo_enabled) {
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
/*set to INT64_MAX-1 so that it is found for best_down_child*/
- priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
}
/*
@@ -5232,13 +6056,14 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
"up_children (%d) > halo_max_replicas (%d)",
worst_up_child, up_children, priv->halo_max_replicas);
}
-
+out:
if (up_children == 1) {
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
"Subvolume '%s' came back up; "
"going online.",
child_xlator->name);
- gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
} else {
*event = GF_EVENT_SOME_DESCENDENT_UP;
}
@@ -5282,6 +6107,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*/
if (child_latency_msec < 0) {
priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
}
priv->child_up[idx] = 0;
@@ -5294,7 +6120,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
* as we want it to be up to date if we are going to
* begin using it synchronously.
*/
- if (up_children < priv->halo_min_replicas) {
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
best_down_child = find_best_down_child(this);
if (best_down_child >= 0) {
gf_msg_debug(this->name, 0,
@@ -5306,7 +6132,6 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*up_child = best_down_child;
}
}
-
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
down_children++;
@@ -5315,7 +6140,8 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
"All subvolumes are down. Going "
"offline until at least one of them "
"comes back up.");
- gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
} else {
*event = GF_EVENT_SOME_DESCENDENT_DOWN;
}
@@ -5364,6 +6190,11 @@ afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
}
LOCK(&priv->lock);
{
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
priv->release_ta_notify_dom_lock = _gf_true;
inmem_count = priv->ta_in_mem_txn_count;
onwire_count = priv->ta_on_wire_txn_count;
@@ -5371,7 +6202,7 @@ afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
UNLOCK(&priv->lock);
if (inmem_count || onwire_count)
/* lock release will happen in txn code path after
- * inflight or on-wire txns are over.*/
+ * in-memory or on-wire txns are over.*/
return;
afr_ta_lock_release_synctask(this);
@@ -5470,14 +6301,15 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
goto out;
}
- had_quorum = priv->quorum_count && afr_has_quorum(priv->child_up, this);
- if (priv->halo_enabled) {
- halo_max_latency_msec = afr_get_halo_latency(this);
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
- if (event == GF_EVENT_CHILD_PING) {
/* Calculates the child latency and sets event
*/
- child_latency_msec = (int64_t)(uintptr_t)data2;
LOCK(&priv->lock);
{
__afr_handle_ping_event(this, child_xlator, idx,
@@ -5485,6 +6317,12 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
child_latency_msec);
}
UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
}
@@ -5528,22 +6366,27 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
if (priv->thin_arbiter_count &&
(idx == AFR_CHILD_THIN_ARBITER)) {
priv->ta_child_up = 1;
+ priv->ta_event_gen++;
break;
}
__afr_handle_child_up_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
case GF_EVENT_CHILD_DOWN:
if (priv->thin_arbiter_count &&
(idx == AFR_CHILD_THIN_ARBITER)) {
priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
break;
}
__afr_handle_child_down_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
case GF_EVENT_CHILD_CONNECTING:
@@ -5585,16 +6428,18 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
UNLOCK(&priv->lock);
if (priv->quorum_count) {
- has_quorum = afr_has_quorum(priv->child_up, this);
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
if (!had_quorum && has_quorum) {
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
"Client-quorum is met");
- gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
}
if (had_quorum && !has_quorum) {
gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
"Client-quorum is not met");
- gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
}
}
@@ -5613,10 +6458,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
* b) Already heard from everyone, but we now got a child-up
* event.
*/
- if (have_heard_from_all && priv->shd.iamshd) {
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i])
- afr_selfheal_childup(this, i);
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
}
out:
@@ -5637,7 +6480,7 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
- local->child_up = GF_CALLOC(priv->child_count, sizeof(*local->child_up),
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
if (op_errno)
@@ -5696,7 +6539,11 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
if (priv->thin_arbiter_count) {
local->ta_child_up = priv->ta_child_up;
local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
}
+ local->is_new_entry = _gf_false;
INIT_LIST_HEAD(&local->healer);
return 0;
@@ -5709,11 +6556,6 @@ afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
{
int ret = -ENOMEM;
- lk->locked_nodes = GF_CALLOC(sizeof(*lk->locked_nodes), child_count,
- gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
-
lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->lower_locked_nodes)
@@ -5771,6 +6613,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
afr_private_t *priv = NULL;
priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
if (ret < 0)
goto out;
@@ -5808,10 +6654,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
goto out;
ret = 0;
- INIT_LIST_HEAD(&local->transaction.wait_list);
- INIT_LIST_HEAD(&local->transaction.owner_list);
- INIT_LIST_HEAD(&local->ta_waitq);
- INIT_LIST_HEAD(&local->ta_onwireq);
out:
return ret;
}
@@ -5830,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv)
if (!priv)
goto out;
+
+ GF_FREE(priv->sh_domain);
GF_FREE(priv->last_event);
child_count = priv->child_count;
@@ -5845,7 +6689,9 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
LOCK_DESTROY(&priv->lock);
@@ -5896,263 +6742,218 @@ out:
return changelog;
}
-gf_boolean_t
-afr_decide_heal_info(afr_private_t *priv, unsigned char *sources, int source)
+static dict_t *
+afr_set_heal_info(char *status)
{
- int sources_count = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
- if (source < 0)
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
goto out;
+ }
- sources_count = AFR_COUNT(sources, priv->child_count);
- if (sources_count == priv->child_count)
- return _gf_false;
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
out:
- return _gf_true;
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
}
-int
-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *msh,
- unsigned char *pending)
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
{
- int ret = -1;
- unsigned char *locked_on = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- struct afr_reply *locked_replies = NULL;
-
afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- locked_on = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
- {
- if (ret == 0) {
- /* Not a single lock */
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_metadata_prepare(
- frame, this, inode, locked_on, sources, sinks, healed_sinks,
- undid_pending, locked_replies, pending);
- *msh = afr_decide_heal_info(priv, sources, ret);
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_boolean_t *dsh, unsigned char *pflag)
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
{
- int ret = -1;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *locked_replies = NULL;
- inode_t *inode = fd->inode;
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
- priv = this->private;
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- undid_pending, locked_replies, pflag);
- *dsh = afr_decide_heal_info(priv, sources, ret);
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *esh,
- unsigned char *pflag)
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
{
int ret = -1;
- int source = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- gf_boolean_t granular_locks = _gf_false;
+ char *key1 = NULL;
+ char *key2 = NULL;
priv = this->private;
- if (strcmp("granular", priv->locking_scheme) == 0)
- granular_locks = _gf_true;
- locked_on = alloca0(priv->child_count);
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- if (!granular_locks) {
- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on);
- }
- {
- if (!granular_locks && ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
}
+ }
- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL,
- data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- locked_replies, &source, pflag);
- if ((ret == 0) && (*pflag & PFLAG_SBRAIN))
- ret = -EIO;
- *esh = afr_decide_heal_info(priv, sources, ret);
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
}
- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock,
- NULL);
}
-unlock:
- if (!granular_locks)
- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on, NULL);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
return ret;
}
+/*return EIO, EAGAIN or pending*/
int
-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode, gf_boolean_t *entry_selfheal,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- unsigned char *pending)
-
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
{
int ret = -1;
- fd_t *fd = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
gf_boolean_t dsh = _gf_false;
gf_boolean_t msh = _gf_false;
gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
- &esh);
+ &esh, replies);
if (ret)
goto out;
-
- /* For every heal type hold locks and check if it indeed needs heal */
-
- /* Heal-info does an open() on the file being examined so that the
- * current eager-lock holding client, if present, at some point sees
- * open-fd count being > 1 and releases the eager-lock so that heal-info
- * doesn't remain blocked forever until IO completes.
- */
- if ((*inode)->ia_type == IA_IFREG) {
- ret = afr_selfheal_data_open(this, *inode, &fd);
- if (ret < 0) {
- gf_msg_debug(this->name, -ret, "%s: Failed to open",
- uuid_utoa((*inode)->gfid));
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
}
}
-
if (msh) {
- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh,
- pending);
- if (ret == -EIO)
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (dsh) {
- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending);
- if (ret == -EIO || (ret == -EAGAIN))
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (esh) {
- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh,
- pending);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
}
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
*data_selfheal = dsh;
*entry_selfheal = esh;
*metadata_selfheal = msh;
- if (fd)
- fd_unref(fd);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
return ret;
}
-dict_t *
-afr_set_heal_info(char *status)
-{
- dict_t *dict = NULL;
- int ret = -1;
-
- dict = dict_new();
- if (!dict) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = dict_set_str(dict, "heal-info", status);
- if (ret)
- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
- "Failed to set heal-info key to "
- "%s",
- status);
-out:
- return dict;
-}
-
int
afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
@@ -6162,17 +6963,27 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
unsigned char pending = 0;
dict_t *dict = NULL;
int ret = -1;
- int op_errno = 0;
+ int op_errno = ENOMEM;
inode_t *inode = NULL;
char *substr = NULL;
char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode,
- &entry_selfheal, &data_selfheal,
- &metadata_selfheal, &pending);
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
if (ret == -ENOMEM) {
- op_errno = -ret;
ret = -1;
goto out;
}
@@ -6185,26 +6996,50 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
if (ret == -EIO) {
ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else if (ret == -EAGAIN) {
ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else if (ret >= 0) {
/* value of ret = source index
* so ret >= 0 and at least one of the 3 booleans set to
* true means a source is identified; heal is required.
*/
if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
- dict = afr_set_heal_info("no-heal");
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else {
ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
} else if (ret < 0) {
/* Apart from above checked -ve ret values, there are
@@ -6216,14 +7051,25 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
*/
if (data_selfheal || entry_selfheal || metadata_selfheal) {
ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
}
+
ret = 0;
+ op_errno = 0;
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
if (dict)
dict_unref(dict);
@@ -6345,10 +7191,10 @@ afr_get_split_brain_status(void *opaque)
}
/* Calculation for string length :
- * (child_count X length of child-name) + SLEN (" Choices :")
+ * (child_count X length of child-name) + SLEN(" Choices :")
* child-name consists of :
* a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
- * b) strlen ("-client-00,") assuming 16 replicas
+ * b) strlen("-client-00,") assuming 16 replicas
*/
choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
SLEN(" Choices:"));
@@ -6357,8 +7203,8 @@ afr_get_split_brain_status(void *opaque)
if (ret) {
op_errno = -ret;
if (ret == -EAGAIN) {
- ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS,
- SBRAIN_HEAL_NO_GO_MSG);
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, -ret,
AFR_MSG_DICT_SET_FAILED,
@@ -6387,16 +7233,15 @@ afr_get_split_brain_status(void *opaque)
op_errno = ENOMEM;
goto out;
}
- ret = dict_set_dynstr(dict, GF_AFR_SBRAIN_STATUS, status);
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
} else {
- ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS,
- "The file is not under data or"
- " metadata split-brain");
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -6421,6 +7266,8 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
int op_errno = 0;
dict_t *dict = NULL;
afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
local = frame->local;
dict = dict_new();
@@ -6430,10 +7277,20 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = afr_selfheal_do(frame, this, loc->gfid);
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
if (ret == 1 || ret == 2) {
- ret = dict_set_str(dict, "sh-fail-msg", "File not in split-brain");
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
if (ret)
gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
"Failed to set sh-fail-msg in dict");
@@ -6451,6 +7308,10 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
if (local->op == GF_FOP_GETXATTR)
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
else if (local->op == GF_FOP_SETXATTR)
@@ -6593,7 +7454,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto data_unlock;
ret = __afr_selfheal_data_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6610,7 +7471,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
LLONG_MAX - 1, 0, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto mdata_unlock;
ret = __afr_selfheal_metadata_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6637,12 +7498,14 @@ afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
char *xattr = NULL;
int i = 0;
int len = 0;
+ int keylen = 0;
size_t str_len = 0;
int ret = -1;
priv = this->private;
local = frame->local;
+ keylen = strlen(local->cont.getxattr.name);
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid || local->replies[i].op_ret) {
str_len = strlen(default_str);
@@ -6651,8 +7514,8 @@ afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
buf[len++] = delimiter;
buf[len] = '\0';
} else {
- ret = dict_get_str(local->replies[i].xattr,
- local->cont.getxattr.name, &xattr);
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
if (ret) {
gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
"Failed to get the node_uuid of brick "
@@ -6939,3 +7802,77 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
return _gf_false;
}
+
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
+ return _gf_false;
+
+ local = frame->local;
+
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
+
+ if (local->inode == NULL)
+ return _gf_false;
+
+ if (!__is_root_gfid(local->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
+{
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
+
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
+}
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
+{
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
+ }
+
+ return _gf_false;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 4c40e85f393..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -10,24 +10,17 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -45,6 +38,10 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = local->fd_ctx;
child_index = (long)cookie;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -56,19 +53,22 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
local->fd, NULL);
+ }
+
return 0;
}
int
-afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -84,6 +84,12 @@ afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
goto out;
local->op = GF_FOP_OPENDIR;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -158,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -177,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -222,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 5725b1c5cb3..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -10,24 +10,20 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -98,7 +94,9 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
}
if (local->inode) {
- afr_replies_interpret(frame, this, local->inode, NULL);
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
NULL, &args);
}
@@ -121,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
- afr_inode_event_gen_reset(local->parent, this);
+ afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
- afr_inode_event_gen_reset(local->parent2, this);
+ afr_inode_need_refresh_set(local->parent2, this);
continue;
}
@@ -231,9 +229,9 @@ __afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
__afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
preparent, postparent, preparent2, postparent2,
xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
if (call_count == 0) {
__afr_dir_write_finalize(frame, this);
@@ -347,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int pre_op_count = 0;
int failed_count = 0;
+ unsigned char *success_replies = NULL;
local = frame->local;
priv = this->private;
@@ -362,9 +361,22 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
failed_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
+ /* FOP succeeded on all bricks. */
if (pre_op_count == priv->child_count && !failed_count)
return;
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
+
afr_mark_new_entry_changelog(frame, this);
return;
@@ -423,15 +435,11 @@ int
afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -473,16 +481,6 @@ afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -553,15 +551,11 @@ int
afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -596,16 +590,6 @@ afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -674,15 +658,11 @@ int
afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -698,7 +678,7 @@ afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->cont.mkdir.mode = mode;
local->umask = umask;
- if (!xdata || !dict_get(xdata, "gfid-req")) {
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
op_errno = EPERM;
gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
AFR_MSG_GFID_NULL,
@@ -724,16 +704,6 @@ afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -802,15 +772,11 @@ int
afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -845,16 +811,6 @@ afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -924,15 +880,11 @@ int
afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -966,16 +918,6 @@ afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -1048,15 +990,10 @@ int
afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
transaction_frame = copy_frame(frame);
if (!transaction_frame) {
@@ -1099,35 +1036,6 @@ afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(oldloc->path);
local->transaction.new_basename = AFR_BASENAME(newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee(
- &int_lock->lockee[nlockee], local, &local->transaction.new_parent_loc,
- local->transaction.new_basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- if (local->newloc.inode && IA_ISDIR(local->newloc.inode->ia_type)) {
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->newloc, NULL, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- }
- qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
ret = afr_transaction(transaction_frame, this,
AFR_ENTRY_RENAME_TRANSACTION);
if (ret < 0) {
@@ -1196,15 +1104,11 @@ int
afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1237,16 +1141,6 @@ afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -1313,15 +1207,10 @@ int
afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
@@ -1355,26 +1244,6 @@ afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, &local->loc,
- NULL, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 03338592618..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -15,20 +15,17 @@
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "byte-order.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "quota-common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>
#include "afr-transaction.h"
#include "afr-messages.h"
@@ -305,6 +302,7 @@ afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
afr_local_t *local = NULL;
int op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -531,12 +529,16 @@ afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
long int cky = 0;
int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
priv = this->private;
children = priv->children;
local = frame->local;
cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
LOCK(&frame->lock);
{
@@ -547,11 +549,12 @@ afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->dict)
local->dict = dict_new();
if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report);
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
if (ret)
goto unlock;
- ret = dict_set_dynstr(local->dict, children[cky]->name,
- gf_strdup(tmp_report));
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
if (ret)
goto unlock;
}
@@ -575,8 +578,8 @@ unlock:
}
if (serz_len == -1)
snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
- ret = dict_set_dynstr(xattr, local->cont.getxattr.name,
- gf_strdup(lk_summary));
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
if (ret) {
op_ret = -1;
op_errno = ENOMEM;
@@ -613,6 +616,8 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
long int cky = 0;
int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
priv = this->private;
children = priv->children;
@@ -620,6 +625,9 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -629,11 +637,12 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->dict)
local->dict = dict_new();
if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report);
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
if (ret)
goto unlock;
- ret = dict_set_dynstr(local->dict, children[cky]->name,
- gf_strdup(tmp_report));
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
if (ret)
goto unlock;
}
@@ -657,8 +666,8 @@ unlock:
}
if (serz_len == -1)
snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
- ret = dict_set_dynstr(xattr, local->cont.getxattr.name,
- gf_strdup(lk_summary));
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
if (ret) {
op_ret = -1;
op_errno = ENOMEM;
@@ -802,15 +811,18 @@ unlock:
if (ret) {
local->op_ret = -1;
local->op_errno = ENOMEM;
+ GF_FREE(xattr_serz);
goto unwind;
}
- ret = dict_set_dynstr(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
- xattr_serz);
+ ret = dict_set_dynstr_sizen(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
+ xattr_serz);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set node_uuid key in dict");
local->op_ret = -1;
local->op_errno = ENOMEM;
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
} else {
local->op_ret = local->cont.getxattr.xattr_len - 1;
local->op_errno = 0;
@@ -934,24 +946,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1050,24 +1051,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1096,9 +1086,11 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = 0;
char *xattr = NULL;
char *xattr_serz = NULL;
+ int keylen = 0;
char xattr_cky[1024] = {
0,
};
+ int xattr_cky_len = 0;
dict_t *nxattr = NULL;
long cky = 0;
int32_t padding = 0;
@@ -1111,7 +1103,9 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
-
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -1127,31 +1121,30 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict || (op_ret < 0))
goto unlock;
- if (!local->dict)
+ if (!local->dict) {
local->dict = dict_new();
-
- if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &xattr);
- if (ret)
+ if (!local->dict)
goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- xattr = gf_strdup(xattr);
-
- (void)snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr(local->dict, xattr_cky, xattr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
- "Cannot set xattr cookie key");
- goto unlock;
- }
+ xattr = gf_strdup(xattr);
- local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
+
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!callcnt) {
if (!local->cont.getxattr.xattr_len)
goto unwind;
@@ -1178,6 +1171,7 @@ unlock:
ret = dict_serialize_value_with_delim(
local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
if (ret) {
+ GF_FREE(xattr_serz);
goto unwind;
}
@@ -1185,10 +1179,14 @@ unlock:
*(xattr_serz + padding + tlen) = ')';
*(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz);
- if (ret)
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ }
unwind:
AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno,
@@ -1215,6 +1213,8 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
char xattr_cky[1024] = {
0,
};
+ int keylen = 0;
+ int xattr_cky_len = 0;
dict_t *nxattr = NULL;
long cky = 0;
int32_t padding = 0;
@@ -1227,7 +1227,9 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
-
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -1243,32 +1245,30 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict || (op_ret < 0))
goto unlock;
- if (!local->dict)
+ if (!local->dict) {
local->dict = dict_new();
-
- if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &xattr);
- if (ret)
+ if (!local->dict)
goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- xattr = gf_strdup(xattr);
-
- (void)snprintf(xattr_cky, 1024, "%s-%ld", local->cont.getxattr.name,
- cky);
- ret = dict_set_dynstr(local->dict, xattr_cky, xattr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
- "Cannot set xattr "
- "cookie key");
- goto unlock;
- }
+ xattr = gf_strdup(xattr);
- local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
+
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!callcnt) {
if (!local->cont.getxattr.xattr_len)
goto unwind;
@@ -1295,6 +1295,7 @@ unlock:
ret = dict_serialize_value_with_delim(
local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
if (ret) {
+ GF_FREE(xattr_serz);
goto unwind;
}
@@ -1302,10 +1303,14 @@ unlock:
*(xattr_serz + padding + tlen) = ')';
*(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz);
- if (ret)
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ }
unwind:
AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
@@ -1694,6 +1699,7 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
int32_t op_errno = 0;
fop_fgetxattr_cbk_t cbk = NULL;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1787,6 +1793,7 @@ afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1862,6 +1869,7 @@ afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 1627ee2c426..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -38,5 +38,8 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata);
int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c6fe0939841..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,27 +8,21 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
@@ -180,11 +174,10 @@ __afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
__afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
__afr_inode_write_finalize(frame, this);
@@ -498,6 +491,7 @@ afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int op_errno = ENOMEM;
int ret = -1;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -737,6 +731,7 @@ afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -947,6 +942,7 @@ afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1059,15 +1055,14 @@ afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
local->replies[i].op_ret = op_ret;
local->replies[i].op_errno = op_errno;
- ret = dict_get_str(local->xdata_req, "replicate-brick-op", &op_type);
+ ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
if (ret)
goto out;
- gf_msg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
- op_ret ? op_errno : 0, afr_get_msg_id(op_type),
- "Set of pending xattr %s on"
- " %s.",
- op_ret ? "failed" : "succeeded", priv->children[i]->name);
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
out:
syncbarrier_wake(&local->barrier);
@@ -1105,10 +1100,10 @@ out:
return -ret;
}
-int
+static int
_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
int empty_index, afr_transaction_type type,
- char *op_type)
+ char *op_type, const int op_type_len)
{
int count = 0;
int ret = -ENOMEM;
@@ -1139,7 +1134,8 @@ _afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
if (!local->xdata_req)
goto out;
- ret = dict_set_str(local->xdata_req, "replicate-brick-op", op_type);
+ ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
+ SLEN("replicate-brick-op"), op_type, op_type_len);
if (ret)
goto out;
@@ -1160,9 +1156,8 @@ _afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
}
if (!count) {
- gf_msg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
- "Couldn't acquire lock on"
- " any child.");
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
ret = -EAGAIN;
goto unlock;
}
@@ -1211,26 +1206,41 @@ _afr_handle_empty_brick(void *opaque)
call_frame_t *frame = NULL;
xlator_t *this = NULL;
char *op_type = NULL;
+ int op_type_len = 0;
afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
data = opaque;
frame = data->frame;
empty_index = data->empty_index;
+ if (!data->op_type)
+ goto out;
+
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
op_type = data->op_type;
- this = frame->this;
+ op_type_len = strlen(op_type);
+ this = op_frame->this;
priv = this->private;
- local = AFR_FRAME_INIT(frame, op_errno);
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
if (!local)
goto out;
loc_copy(&local->loc, &data->loc);
- gf_msg(this->name, GF_LOG_INFO, 0, 0, "New brick is : %s",
- priv->children[empty_index]->name);
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
- AFR_METADATA_TRANSACTION, op_type);
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type,
+ op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1244,8 +1254,9 @@ _afr_handle_empty_brick(void *opaque)
local->xattr_req = NULL;
local->xdata_req = NULL;
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
- AFR_ENTRY_TRANSACTION, op_type);
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type,
+ op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1253,6 +1264,9 @@ _afr_handle_empty_brick(void *opaque)
}
ret = 0;
out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
return 0;
}
@@ -1273,14 +1287,14 @@ afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- ret = dict_set_int32(local->xdata_req, "heal-op",
- GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
- ret = dict_set_str(local->xdata_req, "child-name", data);
+ ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1297,9 +1311,8 @@ afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
*/
ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to set"
- "split-brain choice to -1");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
afr_heal_splitbrain_file(frame, this, loc);
ret = 0;
out:
@@ -1322,8 +1335,8 @@ afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
if (spb_child_index < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
- "Invalid subvol: %s", spb_child_str);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
}
return spb_child_index;
}
@@ -1345,11 +1358,9 @@ afr_can_set_split_brain_choice(void *opaque)
&data->m_spb);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to determine if %s"
- " is in split-brain. "
- "Aborting split-brain-choice set.",
- uuid_utoa(loc->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
return ret;
}
@@ -1357,7 +1368,8 @@ int
afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
- void *value = NULL;
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_spbc_timeout_t *data = NULL;
@@ -1368,6 +1380,14 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
priv = this->private;
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
local = AFR_FRAME_INIT(frame, op_errno);
if (!local) {
ret = 1;
@@ -1376,9 +1396,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
local->op = GF_FOP_SETXATTR;
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
if (spb_child_index < 0) {
/* Case where value was "none" */
if (spb_child_index == -2)
@@ -1402,12 +1422,8 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
afr_set_split_brain_choice, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to create"
- " synctask. Aborting split-brain choice set"
- " for %s",
- loc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
ret = 1;
op_errno = ENOMEM;
goto out;
@@ -1416,9 +1432,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
goto out;
}
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
if (spb_child_index < 0) {
ret = 1;
goto out;
@@ -1470,20 +1486,20 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
char *op_type = NULL;
afr_empty_brick_args_t *data = NULL;
- ret = dict_get_str(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
if (!ret)
op_type = GF_AFR_REPLACE_BRICK;
- ab_ret = dict_get_str(dict, GF_AFR_ADD_BRICK, &empty_brick);
+ ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
if (!ab_ret)
op_type = GF_AFR_ADD_BRICK;
if (ret && ab_ret)
goto out;
- if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) {
- gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type),
- "'%s' is an internal extended attribute.", op_type);
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
ret = 1;
goto out;
}
@@ -1509,8 +1525,8 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
_afr_handle_empty_brick_cbk, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, afr_get_msg_id(op_type),
- "Failed to create synctask.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
ret = 1;
op_errno = ENOMEM;
afr_brick_args_cleanup(data);
@@ -1668,6 +1684,7 @@ afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1876,6 +1893,7 @@ afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1976,6 +1994,7 @@ afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2085,6 +2104,7 @@ afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2191,6 +2211,7 @@ afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2222,7 +2243,7 @@ afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->transaction.main_frame = frame;
- local->transaction.start = local->cont.discard.offset;
+ local->transaction.start = local->cont.zerofill.offset;
local->transaction.len = len;
afr_fix_open(fd, this);
@@ -2390,6 +2411,7 @@ afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2484,7 +2506,9 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2493,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 95e52ff4a09..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -22,11 +22,40 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+void
+afr_lockee_cleanup(afr_lockee_t *lockee)
+{
+ if (lockee->fd) {
+ fd_unref(lockee->fd);
+ lockee->fd = NULL;
+ } else {
+ loc_wipe(&lockee->loc);
+ }
+
+ GF_FREE(lockee->basename);
+ lockee->basename = NULL;
+ GF_FREE(lockee->locked_nodes);
+ lockee->locked_nodes = NULL;
+
+ return;
+}
+
+void
+afr_lockees_cleanup(afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_lockee_cleanup(&int_lock->lockee[i]);
+ }
+
+ return;
+}
int
afr_entry_lockee_cmp(const void *l1, const void *l2)
{
- const afr_entry_lockee_t *r1 = l1;
- const afr_entry_lockee_t *r2 = l2;
+ const afr_lockee_t *r1 = l1;
+ const afr_lockee_t *r2 = l2;
int ret = 0;
uuid_t gfid1 = {0};
uuid_t gfid2 = {0};
@@ -81,31 +110,14 @@ internal_lock_count(call_frame_t *frame, xlator_t *this)
}
int
-afr_is_inodelk_transaction(afr_transaction_type type)
-{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- ret = 1;
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-int
-afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count)
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count)
{
- int ret = -1;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
+ GF_ASSERT(int_lock->lockee_count < AFR_LOCKEE_COUNT_MAX);
loc_copy(&lockee->loc, loc);
lockee->basename = (basename) ? gf_strdup(basename) : NULL;
if (basename && !lockee->basename)
@@ -119,28 +131,45 @@ afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
goto out;
ret = 0;
+ int_lock->lockee_count++;
out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
return ret;
}
-void
-afr_entry_lockee_cleanup(afr_internal_lock_t *int_lock)
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count)
{
- int i = 0;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- for (i = 0; i < int_lock->lockee_count; i++) {
- loc_wipe(&int_lock->lockee[i].loc);
- if (int_lock->lockee[i].basename)
- GF_FREE(int_lock->lockee[i].basename);
- if (int_lock->lockee[i].locked_nodes)
- GF_FREE(int_lock->lockee[i].locked_nodes);
+ if (local->fd) {
+ lockee->fd = fd_ref(local->fd);
+ } else {
+ loc_copy(&lockee->loc, &local->loc);
}
- return;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
static int
-initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
+initialize_internal_lock_variables(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -152,9 +181,10 @@ initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->entrylk_lock_count = 0;
+ int_lock->lock_count = 0;
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
+ int_lock->lk_attempted_count = 0;
for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
if (!int_lock->lockee[i].locked_nodes)
@@ -167,28 +197,6 @@ initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
return 0;
}
-static int
-initialize_inodelk_variables(call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_count = 0;
- int_lock->lk_attempted_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
-
- memset(int_lock->locked_nodes, 0,
- sizeof(*int_lock->locked_nodes) * priv->child_count);
-
- return 0;
-}
-
int
afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock)
{
@@ -216,19 +224,74 @@ afr_locked_nodes_count(unsigned char *locked_nodes, int child_count)
return call_count;
}
-/* FIXME: What if UNLOCK fails */
+static void
+afr_log_locks_failure(call_frame_t *frame, char *where, char *what,
+ int op_errno)
+{
+ xlator_t *this = frame->this;
+ gf_lkowner_t *lk_owner = &frame->root->lk_owner;
+ afr_local_t *local = frame->local;
+ const char *fop = NULL;
+ char *gfid = NULL;
+ const char *name = NULL;
+
+ fop = gf_fop_list[local->op];
+
+ switch (local->transaction.type) {
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ switch (local->op) {
+ case GF_FOP_LINK:
+ gfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ gfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do entry %s with lk-owner:%s on %s "
+ "while attempting %s on {pgfid:%s, name:%s}.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid, name);
+ break;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ gfid = uuid_utoa(local->inode->gfid);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do inode %s with lk-owner:%s on %s "
+ "while attempting %s on gfid:%s.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid);
+ break;
+ }
+}
+
static int32_t
afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
+ int lockee_num = 0;
int call_count = 0;
+ int child_index = 0;
int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
+ priv = this->private;
+ lockee_num = (int)((long)cookie) / priv->child_count;
+ child_index = (int)((long)cookie) % priv->child_count;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ afr_log_locks_failure(frame, priv->children[child_index]->name,
+ "unlock", op_errno);
+ }
+ int_lock->lockee[lockee_num].locked_nodes[child_index] &= LOCKED_NO;
if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
ret = afr_write_subvol_reset(frame, this);
@@ -239,7 +302,6 @@ afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK(&frame->lock);
if (call_count == 0) {
- gf_msg_trace(this->name, 0, "All internal locks unlocked");
int_lock->lock_cbk(frame, this);
}
@@ -247,143 +309,88 @@ afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
void
-afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
- int32_t child_index)
-{
- int_lock->locked_nodes[child_index] &= LOCKED_NO;
-}
-
-static int32_t
-afr_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = (long)cookie;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- priv = this->private;
-
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
- "path=%s gfid=%s: unlock failed on subvolume %s "
- "with lock owner %s",
- local->loc.path, loc_gfid_utoa(&(local->loc)),
- priv->children[child_index]->name,
- lkowner_utoa(&frame->root->lk_owner));
- }
-
- afr_update_uninodelk(local, int_lock, child_index);
-
- afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-static int
-afr_unlock_inodelk(call_frame_t *frame, xlator_t *this)
+afr_internal_lock_wind(call_frame_t *frame,
+ int32_t (*cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *),
+ void *cookie, int child, int lockee_num,
+ gf_boolean_t blocking, gf_boolean_t unlock)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ entrylk_cmd cmd = ENTRYLK_LOCK_NB;
+ int32_t cmd1 = F_SETLK;
struct gf_flock flock = {
0,
};
- int call_count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->flock.l_start;
- flock.l_len = int_lock->flock.l_len;
- flock.l_type = F_UNLCK;
-
- call_count = afr_locked_nodes_count(int_lock->locked_nodes,
- priv->child_count);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count) {
- GF_ASSERT(!local->transaction.do_eager_unlock);
- gf_msg_trace(this->name, 0, "No internal locks unlocked");
-
- int_lock->lock_cbk(frame, this);
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
- continue;
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_unlock_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLK, &flock, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_unlock_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLK, &flock, NULL);
- }
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (unlock) {
+ cmd = ENTRYLK_UNLOCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd = ENTRYLK_LOCK;
+ }
- if (!--call_count)
+ if (local->fd) {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->fentrylk,
+ int_lock->domain,
+ int_lock->lockee[lockee_num].fd,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_num].loc,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ }
break;
- }
-out:
- return 0;
-}
-
-static int32_t
-afr_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = 0;
- int lockee_no = 0;
-
- priv = this->private;
- lockee_no = (int)((long)cookie) / priv->child_count;
- child_index = (int)((long)cookie) % priv->child_count;
- local = frame->local;
- int_lock = &local->internal_lock;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ flock = int_lock->lockee[lockee_num].flock;
+ if (unlock) {
+ flock.l_type = F_UNLCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd1 = F_SETLKW;
+ }
- if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_ENTRY_UNLOCK_FAIL,
- "%s: unlock failed on %s", local->loc.path,
- priv->children[child_index]->name);
+ if (local->fd) {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->finodelk, int_lock->domain,
+ int_lock->lockee[lockee_num].fd, cmd1, &flock, NULL);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->inodelk, int_lock->domain,
+ &int_lock->lockee[lockee_num].loc, cmd1, &flock, NULL);
+ }
+ break;
}
-
- int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
- afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, NULL);
-
- return 0;
}
static int
-afr_unlock_entrylk(call_frame_t *frame, xlator_t *this)
+afr_unlock_now(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int call_count = 0;
- int index = 0;
- int lockee_no = 0;
- int copies = 0;
+ int child_index = 0;
+ int lockee_num = 0;
int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- copies = priv->child_count;
call_count = afr_lockee_locked_nodes_count(int_lock);
@@ -396,16 +403,13 @@ afr_unlock_entrylk(call_frame_t *frame, xlator_t *this)
}
for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- lockee_no = i / copies;
- index = i % copies;
- if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
- STACK_WIND_COOKIE(
- frame, afr_unlock_entrylk_cbk, (void *)(long)i,
- priv->children[index], priv->children[index]->fops->entrylk,
- int_lock->domain, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename, ENTRYLK_UNLOCK,
- ENTRYLK_WRLCK, NULL);
-
+ lockee_num = i / priv->child_count;
+ child_index = i % priv->child_count;
+ if (int_lock->lockee[lockee_num].locked_nodes[child_index] &
+ LOCKED_YES) {
+ afr_internal_lock_wind(frame, afr_unlock_common_cbk,
+ (void *)(long)i, child_index, lockee_num,
+ _gf_false, _gf_true);
if (!--call_count)
break;
}
@@ -415,18 +419,6 @@ out:
return 0;
}
-int32_t
-afr_unlock_now(call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = frame->local;
-
- if (afr_is_inodelk_transaction(local->transaction.type))
- afr_unlock_inodelk(frame, this);
- else
- afr_unlock_entrylk(frame, this);
- return 0;
-}
-
static int32_t
afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -436,14 +428,14 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
afr_private_t *priv = NULL;
int cky = (long)cookie;
int child_index = 0;
- int lockee_no = 0;
+ int lockee_num = 0;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
child_index = ((int)cky) % priv->child_count;
- lockee_no = ((int)cky) / priv->child_count;
+ lockee_num = ((int)cky) / priv->child_count;
LOCK(&frame->lock);
{
@@ -470,23 +462,16 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
afr_unlock_now(frame, this);
} else {
if (op_ret == 0) {
- if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
- local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- int_lock->lockee[lockee_no]
- .locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- } else {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
-
- if (local->transaction.type == AFR_DATA_TRANSACTION) {
- LOCK(&local->inode->lock);
- {
- local->inode_ctx->lock_count++;
- }
- UNLOCK(&local->inode->lock);
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
}
+ UNLOCK(&local->inode->lock);
}
}
afr_lock_blocking(frame, this, cky + 1);
@@ -495,30 +480,6 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
return 0;
}
-static int32_t
-afr_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static int32_t
-afr_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static gf_boolean_t
-afr_is_entrylk(afr_transaction_type trans_type)
-{
- if (afr_is_inodelk_transaction(trans_type))
- return _gf_false;
- return _gf_true;
-}
-
static gf_boolean_t
_is_lock_wind_needed(afr_local_t *local, int child_index)
{
@@ -528,40 +489,12 @@ _is_lock_wind_needed(afr_local_t *local, int child_index)
return _gf_true;
}
-static void
-afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local,
- afr_internal_lock_t *int_lock)
-{
- const char *fop = NULL;
- char *pargfid = NULL;
- const char *name = NULL;
-
- fop = gf_fop_list[local->op];
-
- switch (local->op) {
- case GF_FOP_LINK:
- pargfid = uuid_utoa(local->newloc.pargfid);
- name = local->newloc.name;
- break;
- default:
- pargfid = uuid_utoa(local->loc.pargfid);
- name = local->loc.name;
- break;
- }
-
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
- "Unable to obtain sufficient blocking entry locks on at least "
- "one child while attempting %s on {pgfid:%s, name:%s}.",
- fop, pargfid, name);
-}
-
static gf_boolean_t
is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
- gf_boolean_t is_entrylk = _gf_false;
int child = 0;
int nlockee = 0;
int lockee_count = 0;
@@ -571,42 +504,26 @@ is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
priv = this->private;
int_lock = &local->internal_lock;
lockee_count = int_lock->lockee_count;
- is_entrylk = afr_is_entrylk(local->transaction.type);
-
- if (!is_entrylk) {
- if (int_lock->lock_count == 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
- "Unable to obtain "
- "blocking inode lock on even one child for "
- "gfid:%s.",
- uuid_utoa(local->inode->gfid));
- return _gf_false;
- } else {
- /*inodelk succeeded on at least one child. */
- return _gf_true;
- }
- } else {
- if (int_lock->entrylk_lock_count == 0) {
- afr_log_entry_locks_failure(this, local, int_lock);
- return _gf_false;
- }
- /* For FOPS that take multiple sets of locks (mkdir, rename),
- * there must be at least one brick on which the locks from
- * all lock sets were successful. */
- for (child = 0; child < priv->child_count; child++) {
- ret = _gf_true;
- for (nlockee = 0; nlockee < lockee_count; nlockee++) {
- if (!(int_lock->lockee[nlockee].locked_nodes[child] &
- LOCKED_YES))
- ret = _gf_false;
- }
- if (ret)
- return ret;
+ if (int_lock->lock_count == 0) {
+ afr_log_locks_failure(frame, "any subvolume", "lock",
+ int_lock->lock_op_errno);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be at least one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
}
- if (!ret)
- afr_log_entry_locks_failure(this, local, int_lock);
+ if (ret)
+ return ret;
}
+ if (!ret)
+ afr_log_locks_failure(frame, "all", "lock", int_lock->lock_op_errno);
return ret;
}
@@ -617,27 +534,16 @@ afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- struct gf_flock flock = {
- 0,
- };
uint64_t ctx = 0;
int ret = 0;
int child_index = 0;
- int lockee_no = 0;
- gf_boolean_t is_entrylk = _gf_false;
+ int lockee_num = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
child_index = cookie % priv->child_count;
- lockee_no = cookie / priv->child_count;
- is_entrylk = afr_is_entrylk(local->transaction.type);
-
- if (!is_entrylk) {
- flock.l_start = int_lock->flock.l_start;
- flock.l_len = int_lock->flock.l_len;
- flock.l_type = int_lock->flock.l_type;
- }
+ lockee_num = cookie / priv->child_count;
if (local->fd) {
ret = fd_ctx_get(local->fd, this, &ctx);
@@ -681,52 +587,8 @@ afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
return 0;
}
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_blocking_inodelk_cbk, (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLKW, &flock, NULL);
-
- } else {
- STACK_WIND_COOKIE(
- frame, afr_blocking_inodelk_cbk, (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLKW, &flock, NULL);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- /*Accounting for child_index increments on 'down'
- *and 'fd-less' children */
-
- if (local->fd) {
- STACK_WIND_COOKIE(frame, afr_blocking_entrylk_cbk,
- (void *)(long)cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- int_lock->domain, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_blocking_entrylk_cbk, (void *)(long)cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- int_lock->domain, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename, ENTRYLK_LOCK,
- ENTRYLK_WRLCK, NULL);
- }
-
- break;
- }
+ afr_internal_lock_wind(frame, afr_lock_cbk, (void *)(long)cookie,
+ child_index, lockee_num, _gf_true, _gf_false);
return 0;
}
@@ -743,20 +605,10 @@ afr_blocking_lock(call_frame_t *frame, xlator_t *this)
local = frame->local;
int_lock = &local->internal_lock;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- initialize_inodelk_variables(frame, this);
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- up_count = AFR_COUNT(local->child_up, priv->child_count);
- int_lock->lk_call_count = int_lock->lk_expected_count =
- (int_lock->lockee_count * up_count);
- initialize_entrylk_variables(frame, this);
- break;
- }
+ up_count = AFR_COUNT(local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count =
+ (int_lock->lockee_count * up_count);
+ initialize_internal_lock_variables(frame, this);
afr_lock_blocking(frame, this, 0);
@@ -764,171 +616,20 @@ afr_blocking_lock(call_frame_t *frame, xlator_t *this)
}
static int32_t
-afr_nonblocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_nb_internal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
int call_count = 0;
- int child_index = (long)cookie;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- copies = priv->child_count;
- index = child_index % copies;
- lockee_no = child_index / copies;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK(&frame->lock);
- {
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
- AFR_MSG_LOCK_XLATOR_NOT_LOADED,
- "subvolume does not support "
- "locking. please load features/locks"
- " xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->lockee[lockee_no].locked_nodes[index] |= LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- }
-
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- gf_msg_trace(this->name, 0, "Last locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->entrylk_lock_count == int_lock->lk_expected_count) {
- gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk(frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_msg_trace(this->name, 0,
- "%d servers locked. Trying again "
- "with blocking calls",
- int_lock->lock_count);
-
- afr_unlock_now(frame, this);
- }
- }
-
- return 0;
-}
-
-int
-afr_nonblocking_entrylk(call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ int child_index = 0;
+ int lockee_num = 0;
afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- int32_t call_count = 0;
- int i = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
priv = this->private;
- copies = priv->child_count;
- initialize_entrylk_variables(frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get(local->fd, this);
- if (!fd_ctx) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
- "unable to get fd ctx for fd=%p", local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- afr_unlock_now(frame, this);
- return -1;
- }
-
- call_count = int_lock->lockee_count * internal_lock_count(frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
- "fd not open on any subvolumes. aborting.");
- afr_unlock_now(frame, this);
- goto out;
- }
-
- /* Send non-blocking entrylk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- index = i % copies;
- lockee_no = i / copies;
- if (local->child_up[index]) {
- STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk,
- (void *)(long)i, priv->children[index],
- priv->children[index]->fops->fentrylk,
- this->name, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- if (!--call_count)
- break;
- }
- }
- } else {
- call_count = int_lock->lockee_count * internal_lock_count(frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- index = i % copies;
- lockee_no = i / copies;
- if (local->child_up[index]) {
- STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk,
- (void *)(long)i, priv->children[index],
- priv->children[index]->fops->entrylk,
- this->name, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
- }
- }
- }
-out:
- return 0;
-}
-
-int32_t
-afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long)cookie;
+ child_index = ((long)cookie) % priv->child_count;
+ lockee_num = ((long)cookie) / priv->child_count;
local = frame->local;
int_lock = &local->internal_lock;
@@ -953,11 +654,14 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
" xlator on server");
local->op_ret = op_ret;
int_lock->lock_op_ret = op_ret;
+
int_lock->lock_op_errno = op_errno;
local->op_errno = op_errno;
}
- } else {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
int_lock->lock_count++;
}
@@ -966,7 +670,7 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK(&frame->lock);
if (call_count == 0) {
- gf_msg_trace(this->name, 0, "Last inode locking reply received");
+ gf_msg_trace(this->name, 0, "Last locking reply received");
/* all locks successful. Proceed to call FOP */
if (int_lock->lock_count == int_lock->lk_expected_count) {
gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
@@ -977,8 +681,8 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
again, this time with serially blocking locks */
else {
gf_msg_trace(this->name, 0,
- "%d servers locked. "
- "Trying again with blocking calls",
+ "%d servers locked. Trying again "
+ "with blocking calls",
int_lock->lock_count);
afr_unlock_now(frame, this);
@@ -989,12 +693,14 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
+ int child = 0;
+ int lockee_num = 0;
int32_t call_count = 0;
int i = 0;
int ret = 0;
@@ -1003,7 +709,7 @@ afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- initialize_inodelk_variables(frame, this);
+ initialize_internal_lock_variables(frame, this);
if (local->fd) {
fd_ctx = afr_fd_ctx_get(local->fd, this);
@@ -1022,36 +728,29 @@ afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
}
}
- call_count = internal_lock_count(frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count(frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
if (!call_count) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
- "All bricks are down, aborting.");
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
afr_unlock_now(frame, this);
goto out;
}
- /* Send non-blocking inodelk calls only on up children
+ /* Send non-blocking lock calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_nonblocking_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLK, &int_lock->flock, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_nonblocking_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLK, &int_lock->flock, NULL);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ child = i % priv->child_count;
+ lockee_num = i / priv->child_count;
+ if (local->child_up[child]) {
+ afr_internal_lock_wind(frame, afr_nb_internal_lock_cbk,
+ (void *)(long)i, child, lockee_num,
+ _gf_false, _gf_false);
+ if (!--call_count)
+ break;
}
- if (!--call_count)
- break;
}
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index d62f7876bcd..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -11,35 +11,19 @@
#ifndef __AFR_MEM_TYPES_H__
#define __AFR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_afr_mem_types_ {
- gf_afr_mt_iovec = gf_common_mt_end + 1,
- gf_afr_mt_afr_fd_ctx_t,
+ gf_afr_mt_afr_fd_ctx_t = gf_common_mt_end + 1,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
gf_afr_mt_xattr_key,
gf_afr_mt_dict_t,
gf_afr_mt_xlator_t,
- gf_afr_mt_iatt,
- gf_afr_mt_int,
gf_afr_mt_afr_node_character,
- gf_afr_mt_sh_diff_loop_state,
- gf_afr_mt_uint8_t,
- gf_afr_mt_loc_t,
- gf_afr_mt_entry_name,
- gf_afr_mt_pump_priv,
- gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
- gf_afr_fd_paused_call_t,
- gf_afr_mt_crawl_data_t,
- gf_afr_mt_brick_pos_t,
- gf_afr_mt_shd_bool_t,
- gf_afr_mt_shd_timer_t,
gf_afr_mt_shd_event_t,
- gf_afr_mt_time_t,
- gf_afr_mt_pos_data_t,
gf_afr_mt_reply_t,
gf_afr_mt_subvol_healer_t,
gf_afr_mt_spbc_timeout_t,
@@ -47,6 +31,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_empty_brick_t,
gf_afr_mt_child_latency_t,
gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 696336889d3..e73fd997765 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -11,7 +11,7 @@
#ifndef _AFR_MESSAGES_H_
#define _AFR_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -23,25 +23,145 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET,
- AFR_MSG_QUORUM_OVERRIDE, AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP,
- AFR_MSG_SUBVOLS_DOWN, AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN,
- AFR_MSG_OPEN_FAIL, AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS,
- AFR_MSG_GFID_NULL, AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
- AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
- AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
- AFR_MSG_BLOCKING_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
- AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
- AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR,
- AFR_MSG_SELF_HEAL_INFO, AFR_MSG_READ_SUBVOL_ERROR,
- AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD,
- AFR_MSG_INVALID_DATA, AFR_MSG_INVALID_ARG,
- AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
- AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
- AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS,
- AFR_MSG_NO_CHANGELOG, AFR_MSG_TIMER_CREATE_FAIL,
- AFR_MSG_SBRAIN_FAV_CHILD_POLICY, AFR_MSG_INODE_CTX_GET_FAILED,
- AFR_MSG_THIN_ARB);
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 1763dc0a4cc..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,32 +8,22 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+
#include "afr-transaction.h"
gf_boolean_t
@@ -73,6 +63,10 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
local = frame->local;
fd_ctx = local->fd_ctx;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -84,13 +78,16 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
- if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
STACK_WIND(frame, afr_open_ftruncate_cbk, this,
this->fops->ftruncate, fd, 0, NULL);
} else {
@@ -140,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -161,6 +158,11 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -177,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
@@ -213,11 +215,9 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
"successfully on subvolume %s",
local->loc.path, priv->children[child_index]->name);
} else {
- gf_msg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
- AFR_MSG_OPEN_FAIL,
- "Failed to open %s on "
- "subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
}
fd_ctx = local->fd_ctx;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 1cd5c2eee3b..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -30,27 +30,6 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index)
GF_ATOMIC_DEC(priv->pending_reads[child_index]);
}
-static gf_boolean_t
-afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
-{
- int *pending = NULL;
- int ret = 0;
- int i = 0;
-
- ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
- if (ret == 0) {
- for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
- /* Not doing a ntoh32(pending) as we just want to check
- * if it is non-zero or not. */
- if (pending[i]) {
- return _gf_true;
- }
- }
- }
-
- return _gf_false;
-}
-
void
afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
@@ -114,7 +93,7 @@ afr_ta_read_txn(void *opaque)
call_frame_t *frame = NULL;
xlator_t *this = NULL;
int read_subvol = -1;
- int up_child = AFR_CHILD_UNKNOWN;
+ int query_child = AFR_CHILD_UNKNOWN;
int possible_bad_child = AFR_CHILD_UNKNOWN;
int ret = 0;
int op_errno = ENOMEM;
@@ -134,18 +113,18 @@ afr_ta_read_txn(void *opaque)
this = frame->this;
local = frame->local;
priv = this->private;
+ query_child = local->read_txn_query_child;
- if (local->child_up[AFR_CHILD_ZERO]) {
- up_child = AFR_CHILD_ZERO;
+ if (query_child == AFR_CHILD_ZERO) {
possible_bad_child = AFR_CHILD_ONE;
- } else if (local->child_up[AFR_CHILD_ONE]) {
- up_child = AFR_CHILD_ONE;
+ } else if (query_child == AFR_CHILD_ONE) {
possible_bad_child = AFR_CHILD_ZERO;
+ } else {
+ /*read_txn_query_child is AFR_CHILD_UNKNOWN*/
+ goto out;
}
- GF_ASSERT(up_child != AFR_CHILD_UNKNOWN);
-
- /* Query the up_child to see if it blames the down one. */
+ /* Ask the query_child to see if it blames the possibly bad one. */
xdata_req = dict_new();
if (!xdata_req)
goto out;
@@ -159,30 +138,33 @@ afr_ta_read_txn(void *opaque)
goto out;
if (local->fd) {
- ret = syncop_fxattrop(priv->children[up_child], local->fd,
+ ret = syncop_fxattrop(priv->children[query_child], local->fd,
GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
NULL);
} else {
- ret = syncop_xattrop(priv->children[up_child], &local->loc,
+ ret = syncop_xattrop(priv->children[query_child], &local->loc,
GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
NULL);
}
if (ret || !xdata_rsp) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed xattrop for gfid %s on %s",
- uuid_utoa(local->inode->gfid), priv->children[up_child]->name);
+ uuid_utoa(local->inode->gfid),
+ priv->children[query_child]->name);
op_errno = -ret;
goto out;
}
if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv,
possible_bad_child)) {
- read_subvol = up_child;
+ read_subvol = query_child;
goto out;
}
dict_unref(xdata_rsp);
- /* Query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ xdata_rsp = NULL;
+
+ /* It doesn't. So query thin-arbiter to see if it blames any data brick. */
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -211,8 +193,8 @@ afr_ta_read_txn(void *opaque)
goto unlock;
}
- if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, up_child)) {
- read_subvol = up_child;
+ if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, query_child)) {
+ read_subvol = query_child;
} else {
gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
"Failing read for gfid %s since good brick %s is down",
@@ -290,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -321,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
@@ -431,7 +413,7 @@ afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
local->is_read_txn = _gf_true;
local->transaction.type = type;
- if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) {
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_quorum_errno(priv);
goto read;
@@ -450,6 +432,11 @@ afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
if (priv->thin_arbiter_count &&
AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ if (local->child_up[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (local->child_up[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
afr_ta_read_txn_synctask(frame, this);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 8635b3e9e06..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -10,10 +10,10 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
#include "afr-messages.h"
-#include "events.h"
+#include <glusterfs/events.h>
void
afr_heal_synctask(xlator_t *this, afr_local_t *local);
@@ -21,7 +21,7 @@ afr_heal_synctask(xlator_t *this, afr_local_t *local);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid)
+ unsigned char *sources, void *gfid, int *gfid_idx)
{
afr_private_t *priv = NULL;
call_frame_t *frame = NULL;
@@ -37,30 +37,54 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
priv = this->private;
wind_on = alloca0(priv->child_count);
- ia_type = replies[source].poststat.ia_type;
- if ((ia_type == IA_INVAL) &&
- (AFR_COUNT(sources, priv->child_count) == priv->child_count)) {
- /* If a file is present on some bricks of the replica but parent
- * dir does not have pending xattrs, all bricks are sources and
- * the 'source' we selected earlier might be one where the file
- * is not actually present. Hence check if file is present in
- * any of the sources.*/
- for (i = 0; i < priv->child_count; i++) {
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
if (i == source)
continue;
- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
ia_type = replies[i].poststat.ia_type;
break;
}
}
}
+heal:
/* gfid heal on those subvolumes that do not have gfid associated
* with the inode and update those replies.
*/
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid || replies[i].op_ret != 0)
continue;
+
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
+
if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
replies[i].poststat.ia_type != ia_type)
continue;
@@ -103,7 +127,22 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
afr_reply_wipe(&replies[i]);
afr_reply_copy(&replies[i], &local->replies[i]);
}
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
+ }
+ }
out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+ ret = -afr_final_errno(local, priv);
+ }
loc_wipe(&loc);
if (frame)
AFR_STACK_DESTROY(frame);
@@ -136,27 +175,23 @@ afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies,
{
int j = 0;
int i = 0;
- int src = -1;
- int votes[child_count];
+ int votes;
for (i = 0; i < child_count; i++) {
if (!replies[i].valid || replies[i].op_ret == -1)
continue;
- votes[i] = 1;
+ votes = 1;
for (j = i + 1; j < child_count; j++) {
if ((!gf_uuid_compare(replies[i].poststat.ia_gfid,
replies[j].poststat.ia_gfid)))
- votes[i]++;
- if (votes[i] > child_count / 2) {
- src = i;
- goto out;
- }
+ votes++;
+ if (votes > child_count / 2)
+ return i;
}
}
-out:
- return src;
+ return -1;
}
int
@@ -232,10 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
"All the bricks should be up to resolve the gfid split "
"barin");
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "All the "
- "bricks should be up to resolve the"
- " gfid split barin");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"Error setting"
@@ -245,7 +278,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
}
if (xdata) {
- ret = dict_get_int32(xdata, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
if (ret)
goto fav_child;
} else {
@@ -258,10 +291,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
priv->child_count);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "No bigger file");
+ SNO_BIGGER_FILE);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "No bigger file");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -276,10 +309,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
priv->child_count);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "No difference in mtime");
+ SNO_DIFF_IN_MTIME);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "No difference in mtime");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -290,7 +323,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str(xdata, "child-name", &src_brick);
+ ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
"Error getting the source "
@@ -301,12 +334,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
src_brick);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "Error getting the source "
- "brick");
+ SERROR_GETTING_SRC_BRICK);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "Error getting the source "
- "brick");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -360,11 +391,12 @@ out:
uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
priv->children[src_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=gfid;file="
"<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
"child-%d=%s;gfid-%d=%s",
- this->name, uuid_utoa(pargfid), bname, child_idx,
- priv->children[child_idx]->name, child_idx,
+ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
+ bname, child_idx, priv->children[child_idx]->name, child_idx,
uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
priv->children[src_idx]->name, src_idx,
uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
@@ -481,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
&replies[source].poststat,
- (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL);
+ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
+ NULL);
loc_wipe(&loc);
@@ -878,7 +911,7 @@ afr_dict_contains_heal_op(call_frame_t *frame)
local = frame->local;
xdata_req = local->xdata_req;
- ret = dict_get_int32(xdata_req, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
if (ret)
return _gf_false;
if (local->xdata_rsp == NULL) {
@@ -886,8 +919,8 @@ afr_dict_contains_heal_op(call_frame_t *frame)
if (!local->xdata_rsp)
return _gf_true;
}
- ret = dict_set_str(local->xdata_rsp, "sh-fail-msg",
- "File not in split-brain");
+ ret = dict_set_sizen_str_sizen(local->xdata_rsp, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
return _gf_true;
}
@@ -941,7 +974,8 @@ afr_mark_split_brain_source_sinks_by_heal_op(
xdata_rsp = local->xdata_rsp;
if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", SBRAIN_HEAL_NO_GO_MSG);
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRAIN_HEAL_NO_GO_MSG);
ret = -1;
goto out;
}
@@ -952,16 +986,16 @@ afr_mark_split_brain_source_sinks_by_heal_op(
switch (heal_op) {
case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
if (type == AFR_METADATA_TRANSACTION) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Use source-brick option to"
- " heal metadata split-brain");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
if (!ret)
ret = -1;
goto out;
}
afr_mark_largest_file_as_source(this, sources, replies);
if (AFR_COUNT(sources, priv->child_count) != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", "No bigger file");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_BIGGER_FILE);
if (!ret)
ret = -1;
goto out;
@@ -969,36 +1003,36 @@ afr_mark_split_brain_source_sinks_by_heal_op(
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
if (type == AFR_METADATA_TRANSACTION) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Use source-brick option to"
- " heal metadata split-brain");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
if (!ret)
ret = -1;
goto out;
}
afr_mark_latest_mtime_file_as_source(this, sources, replies);
if (AFR_COUNT(sources, priv->child_count) != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "No difference in mtime");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_DIFF_IN_MTIME);
if (!ret)
ret = -1;
goto out;
}
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str(xdata_req, "child-name", &name);
+ ret = dict_get_str_sizen(xdata_req, "child-name", &name);
if (ret)
goto out;
source = afr_get_child_index_from_name(this, name);
if (source < 0) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Invalid brick name");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SINVALID_BRICK_NAME);
if (!ret)
ret = -1;
goto out;
}
if (locked_on[source] != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", "Brick is not up");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRICK_IS_NOT_UP);
if (!ret)
ret = -1;
goto out;
@@ -1139,7 +1173,8 @@ afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
}
/*
- * afr_sh_fav_by_size: Choose favorite child by size.
+ * afr_sh_fav_by_size: Choose favorite child by size
+ * when not all files are of zero size.
*/
int
afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
@@ -1150,19 +1185,32 @@ afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
uint64_t cmp_sz = 0;
priv = this->private;
-
for (i = 0; i < priv->child_count; i++) {
- if (replies[i].valid == 1) {
- gf_msg_debug(this->name, 0,
- "Child:%s file size = %" PRIu64 " for gfid %s",
- priv->children[i]->name, replies[i].poststat.ia_size,
- uuid_utoa(inode->gfid));
- if (replies[i].poststat.ia_size > cmp_sz) {
- cmp_sz = replies[i].poststat.ia_size;
- fav_child = i;
- }
+ if (!replies[i].valid) {
+ continue;
+ }
+ gf_msg_debug(this->name, 0,
+ "Child:%s file size = %" PRIu64 " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_size,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_type == IA_IFDIR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Cannot perform selfheal on %s. "
+ "Size policy is not applicable to directories.",
+ uuid_utoa(inode->gfid));
+ break;
+ }
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ } else if (replies[i].poststat.ia_size == cmp_sz) {
+ fav_child = -1;
}
}
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No bigger file");
+ }
return fav_child;
}
@@ -1228,7 +1276,10 @@ afr_mark_split_brain_source_sinks_by_policy(
priv = this->private;
fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str);
- if (fav_child > priv->child_count - 1) {
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "No child selected by favorite-child policy.");
+ } else if (fav_child > priv->child_count - 1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
"Invalid child (%d) "
"selected by policy %s.",
@@ -1359,7 +1410,7 @@ afr_mark_split_brain_source_sinks(
if (source >= 0)
return source;
- ret = dict_get_int32(xdata_req, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
if (ret)
goto autoheal;
@@ -1375,7 +1426,7 @@ autoheal:
frame, this, inode, sources, sinks, healed_sinks, locked_on,
replies, type);
if (source != -1) {
- ret = dict_set_int32(xdata_req, "fav-child-policy", 1);
+ ret = dict_set_int32_sizen(xdata_req, "fav-child-policy", 1);
if (ret)
return -1;
}
@@ -1406,7 +1457,7 @@ _afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
priv = this->private;
local = frame->local;
- if (!dict_get(local->xdata_req, "fav-child-policy"))
+ if (!dict_get_sizen(local->xdata_req, "fav-child-policy"))
return 0;
xdata = dict_new();
@@ -1524,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
char *accused = NULL; /* Accused others without any self-accusal */
char *pending = NULL; /* Have pending operations on others */
char *self_accused = NULL; /* Accused itself */
- int min_participants = -1;
priv = this->private;
@@ -1548,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION) {
- min_participants = priv->child_count;
- } else {
- min_participants = AFR_SH_MIN_PARTICIPANTS;
- }
- if (afr_success_count(replies, priv->child_count) < min_participants) {
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
/* Treat this just like locks not being acquired */
return -ENOTCONN;
}
@@ -1614,7 +1659,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION)
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
afr_selfheal_post_op_failure_accounting(priv, accused, sources,
locked_on);
@@ -1722,11 +1767,9 @@ afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata) {
local->replies[i].xdata = dict_ref(xdata);
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[i].need_heal = need_heal;
- } else {
- local->replies[i].need_heal = need_heal;
}
+ local->replies[i].need_heal = need_heal;
syncbarrier_wake(&local->barrier);
return 0;
@@ -1782,10 +1825,41 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
return inode;
}
+static int
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int
afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
uuid_t gfid, struct afr_reply *replies,
- unsigned char *discover_on)
+ unsigned char *discover_on, dict_t *dict)
{
loc_t loc = {
0,
@@ -1800,12 +1874,19 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
xattr_req = dict_new();
if (!xattr_req)
return -ENOMEM;
+ if (dict)
+ dict_copy(dict, xattr_req);
if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
dict_unref(xattr_req);
return -ENOMEM;
}
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
+
loc.inode = inode_ref(inode);
gf_uuid_copy(loc.gfid, gfid);
@@ -1824,12 +1905,16 @@ int
afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ dict_t *dict = NULL;
- priv = frame->this->private;
+ local = frame->local;
+
+ if (local->xattr_req)
+ dict = local->xattr_req;
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
- priv->child_up);
+ local->child_up, dict);
}
unsigned int
@@ -2196,7 +2281,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal)
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
{
afr_private_t *priv = NULL;
inode_t *inode = NULL;
@@ -2255,11 +2341,13 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
priv->children[i]->name,
uuid_utoa(replies[i].poststat.ia_gfid));
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;"
"type=file;gfid=%s;"
"ia_type-%d=%s;ia_type-%d=%s",
- this->name, uuid_utoa(replies[i].poststat.ia_gfid),
- first_idx, gf_inode_type_to_str(first.ia_type), i,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
+ gf_inode_type_to_str(first.ia_type), i,
gf_inode_type_to_str(replies[i].poststat.ia_type));
ret = -EIO;
goto out;
@@ -2330,6 +2418,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
ret = 0;
out:
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
if (inode)
inode_unref(inode);
if (replies)
@@ -2369,8 +2459,11 @@ afr_frame_create(xlator_t *this, int32_t *op_errno)
pid_t pid = GF_CLIENT_PID_SELF_HEALD;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
return NULL;
+ }
local = AFR_FRAME_INIT(frame, (*op_errno));
if (!local) {
@@ -2441,17 +2534,12 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
gf_boolean_t metadata_selfheal = _gf_false;
gf_boolean_t entry_selfheal = _gf_false;
afr_private_t *priv = NULL;
- gf_boolean_t dataheal_enabled = _gf_false;
priv = this->private;
- ret = gf_string2boolean(priv->data_self_heal, &dataheal_enabled);
- if (ret)
- goto out;
-
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
&data_selfheal, &metadata_selfheal,
- &entry_selfheal);
+ &entry_selfheal, NULL);
if (ret)
goto out;
@@ -2468,7 +2556,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
}
}
- if (data_selfheal && dataheal_enabled)
+ if (data_selfheal && priv->data_self_heal)
data_ret = afr_selfheal_data(frame, this, fd);
if (metadata_selfheal && priv->metadata_self_heal)
@@ -2662,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 8cd79c73aa4..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -10,15 +10,10 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
#include "afr-messages.h"
-#include "events.h"
-
-enum {
- AFR_SELFHEAL_DATA_FULL = 0,
- AFR_SELFHEAL_DATA_DIFF,
-};
+#include <glusterfs/events.h>
#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
static int
@@ -73,7 +68,7 @@ __afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd,
xdata = dict_new();
if (!xdata)
goto out;
- if (dict_set_int32(xdata, "check-zero-filled", 1)) {
+ if (dict_set_int32_sizen(xdata, "check-zero-filled", 1)) {
dict_unref(xdata);
goto out;
}
@@ -230,24 +225,40 @@ __afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static int
afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
size_t size, int type, struct afr_reply *replies)
{
int ret = -1;
- int sink_count = 0;
afr_private_t *priv = NULL;
unsigned char *data_lock = NULL;
priv = this->private;
- sink_count = AFR_COUNT(healed_sinks, priv->child_count);
data_lock = alloca0(priv->child_count);
ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
data_lock);
{
- if (ret < sink_count) {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
ret = -ENOTCONN;
goto unlock;
}
@@ -301,7 +312,7 @@ afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
int type = AFR_SELFHEAL_DATA_FULL;
int i = 0;
- if (priv->data_self_heal_algorithm == NULL) {
+ if (priv->data_self_heal_algorithm == AFR_SELFHEAL_DATA_DYNAMIC) {
type = AFR_SELFHEAL_DATA_FULL;
for (i = 0; i < priv->child_count; i++) {
if (!healed_sinks[i] && i != source)
@@ -311,10 +322,8 @@ afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
break;
}
}
- } else if (strcmp(priv->data_self_heal_algorithm, "full") == 0) {
- type = AFR_SELFHEAL_DATA_FULL;
- } else if (strcmp(priv->data_self_heal_algorithm, "diff") == 0) {
- type = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ type = priv->data_self_heal_algorithm;
}
return type;
}
@@ -331,6 +340,9 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
call_frame_t *iter_frame = NULL;
unsigned char arbiter_sink_status = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
+
priv = this->private;
if (priv->arbiter_count) {
arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
@@ -382,17 +394,18 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- unsigned char arbiter_sink_status = 0;
int i = 0;
local = frame->local;
priv = this->private;
- if (priv->arbiter_count) {
- arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
- healed_sinks[ARBITER_BRICK_INDEX] = 0;
- }
-
+ /* This will send truncate on the arbiter brick as well if it is marked as
+ * sink. If changelog is enabled on the volume it captures truncate as a
+ * data transactions on the arbiter brick. This will help geo-rep to
+ * properly sync the data from master to slave if arbiter is the ACTIVE
+ * brick during syncing and which had got some entries healed for data as
+ * part of self heal.
+ */
AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
NULL);
@@ -403,8 +416,6 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
healed_sinks[i] = 0;
- if (arbiter_sink_status)
- healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
return 0;
}
@@ -545,9 +556,11 @@ __afr_selfheal_data_finalize_source(
replies, AFR_DATA_TRANSACTION);
if (source < 0) {
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=data;"
"file=%s",
- this->name, uuid_utoa(inode->gfid));
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
return -EIO;
}
@@ -700,19 +713,18 @@ __afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
goto unlock;
}
- if (priv->arbiter_count &&
- AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
- healed_sinks[ARBITER_BRICK_INDEX]) {
- is_arbiter_the_only_sink = _gf_true;
- goto restore_time;
- }
-
ret = __afr_selfheal_truncate_sinks(
frame, this, fd, healed_sinks,
locked_replies[source].poststat.ia_size);
if (ret < 0)
goto unlock;
+ if (priv->arbiter_count &&
+ AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
ret = 0;
}
unlock:
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 619558e94b7..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -10,60 +10,176 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
#include "afr-messages.h"
-#include "syncop-utils.h"
-#include "events.h"
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
- uuid_utoa(dir->gfid), name);
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
}
@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -149,7 +281,7 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
}
break;
default:
- ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ ret = dict_set_int32_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret)
goto out;
ret = syncop_mknod(
@@ -165,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -192,7 +325,7 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (replies[source].op_ret == 0) {
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
}
@@ -246,6 +379,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
if (replies[i].op_ret != 0)
continue;
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
(ia_type == replies[i].poststat.ia_type)) {
ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
@@ -269,11 +415,12 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
priv->children[src_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=file;"
"file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
"%d=%s;child-%d=%s;type-%d=%s",
- this->name, uuid_utoa(pargfid), bname, i,
- priv->children[i]->name, i,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
priv->children[src_idx]->name, src_idx,
gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
@@ -319,7 +466,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
@@ -465,6 +612,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
afr_private_t *priv = NULL;
int source = -1;
int sources_count = 0;
+ int i = 0;
priv = this->private;
@@ -478,6 +626,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
}
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
+
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ return -1;
+ }
+
return source;
}
@@ -548,10 +710,15 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
- ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1);
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
if (ret) {
dict_unref(xattr);
return -1;
@@ -568,7 +735,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes "
@@ -596,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -726,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -792,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
@@ -831,6 +994,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
subvol = priv->children[subvol_idx];
args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
args.xl = this;
/* args.heal_fd represents the fd associated with the original directory
* on which entry heal is being attempted.
@@ -849,9 +1014,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
* do not treat heal as failure.
*/
if (is_src)
- return -errno;
+ ret = -errno;
else
- return 0;
+ ret = 0;
+ goto out;
}
ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
@@ -861,7 +1027,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (args.mismatch == _gf_true)
ret = -1;
-
+out:
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
return ret;
}
@@ -957,7 +1125,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
@@ -1013,6 +1181,8 @@ unlock:
goto postop_unlock;
}
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
ret = afr_selfheal_undo_pending(
frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
@@ -1079,7 +1249,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
NULL, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ea2a7bfd52f..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -10,9 +10,9 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
-#include "events.h"
+#include <glusterfs/events.h>
#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
@@ -190,6 +190,59 @@ out:
return ret;
}
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
/*
* Look for mismatching uid/gid or mode or user xattrs even if
* AFR xattrs don't say so, and pick one arbitrarily as winner. */
@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
};
int source = -1;
int sources_count = 0;
+ int ret = 0;
priv = this->private;
@@ -242,9 +296,11 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
if (!priv->metadata_splitbrain_forced_heal) {
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;"
"type=metadata;file=%s",
- this->name, uuid_utoa(inode->gfid));
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
return -EIO;
}
@@ -298,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
healed_sinks[i] = 1;
}
}
-
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
return source;
@@ -396,7 +458,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -419,12 +481,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
if (ret)
goto unlock;
- /* Restore atime/mtime for files that don't need data heal as
- * restoring timestamps happens only as a part of data-heal.
- */
- if (!IA_ISREG(locked_replies[source].poststat.ia_type))
- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
- locked_replies);
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
ret = afr_selfheal_undo_pending(
frame, this, inode, sources, sinks, healed_sinks, undid_pending,
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 39aacee6ecf..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "events.h"
+#include <glusterfs/events.h>
#include "afr.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
@@ -18,7 +18,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies, void *gfid,
unsigned char *locked_on, int source,
- unsigned char *sources, gf_boolean_t is_gfid_absent)
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
{
int ret = 0;
int up_count = 0;
@@ -46,8 +47,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
}
}
- afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
- sources, gfid);
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
out:
return ret;
@@ -97,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -119,62 +111,13 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
-/* This function is to be called after ensuring that there is no gfid mismatch
- * for the inode across multiple sources
- */
-static int
-afr_selfheal_gfid_idx_get(xlator_t *this, struct afr_reply *replies,
- unsigned char *sources)
-{
- int i = 0;
- int gfid_idx = -1;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret != 0)
- continue;
-
- if (!sources[i])
- continue;
-
- if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
- continue;
-
- gfid_idx = i;
- break;
- }
- return gfid_idx;
-}
-
static gf_boolean_t
afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
{
@@ -250,13 +193,14 @@ afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
gf_inode_type_to_str(inode_type),
priv->children[type_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=file;"
"file=<gfid:%s>/%s;count=2;"
"child-%d=%s;type-%d=%s;child-%d=%s;"
"type-%d=%s",
- this->name, uuid_utoa(pargfid), bname, i,
- priv->children[i]->name, i,
- gf_inode_type_to_str(inode_type1), type_idx,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
priv->children[type_idx]->name, type_idx,
gf_inode_type_to_str(inode_type));
return -EIO;
@@ -305,9 +249,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
bname, gfid_idx_iter, i,
locked_on, gfid_idx, xdata);
if (!ret && *gfid_idx >= 0) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "GFID split-brain "
- "resolved");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -400,21 +343,18 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
gfid = gfid_req;
} else {
gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
}
is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
- is_gfid_absent);
- if (ret)
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
return ret;
- if (gfid_idx == -1) {
- gfid_idx = afr_selfheal_gfid_idx_get(this, replies, sources);
- if (gfid_idx == -1)
- return -1;
- }
-
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
inode, replies, gfid_idx);
if (ret == -EIO)
@@ -527,7 +467,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
if (!xattr)
return -ENOMEM;
- ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1);
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
if (ret) {
dict_unref(xattr);
return -1;
@@ -545,7 +485,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -591,13 +531,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
struct afr_reply *replies = NULL;
inode_t *inode = NULL;
int first_idx = -1;
+ afr_local_t *local = NULL;
priv = this->private;
+ local = frame->local;
replies = alloca0(sizeof(*replies) * priv->child_count);
inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
- priv->child_up, NULL);
+ local->child_up, NULL);
if (!inode)
return -ENOMEM;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 9c7418c7169..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -11,8 +11,6 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
#define AFR_ONALL(frame, rfn, fop, args...) \
@@ -20,9 +18,8 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0, __count = 0; \
- unsigned char *__child_up = NULL; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
\
- __child_up = alloca0(__priv->child_count); \
memcpy(__child_up, __priv->child_up, \
sizeof(*__child_up) * __priv->child_count); \
__count = AFR_COUNT(__child_up, __priv->child_count); \
@@ -48,13 +45,16 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0; \
- int __count = AFR_COUNT(list, __priv->child_count); \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
\
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
__local->barrier.waitfor = __count; \
afr_local_replies_wipe(__local, __priv); \
\
for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!list[__i]) \
+ if (!__list[__i]) \
continue; \
STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
__priv->children[__i], \
@@ -83,9 +83,9 @@
#define ALLOC_MATRIX(n, type) \
({ \
- type **__ptr = NULL; \
int __i; \
- __ptr = alloca0(n * sizeof(type *)); \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
for (__i = 0; __i < n; __i++) \
__ptr[__i] = alloca0(n * sizeof(type)); \
__ptr; \
@@ -97,6 +97,27 @@
#define SBRAIN_HEAL_NO_GO_MSG \
"Failed to obtain replies from all bricks of " \
"the replica (are they up?). Cannot resolve split-brain."
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SNO_BIGGER_FILE "No bigger file"
+#define SNO_DIFF_IN_MTIME "No difference in mtime"
+#define SUSE_SOURCE_BRICK_TO_HEAL \
+ "Use source-brick option to heal metadata" \
+ " split-brain"
+#define SINVALID_BRICK_NAME "Invalid brick name"
+#define SBRICK_IS_NOT_UP "Brick is not up"
+#define SBRICK_NOT_CONNECTED "Brick is not connected"
+#define SLESS_THAN2_BRICKS_in_REP "< 2 bricks in replica are up"
+#define SBRICK_IS_REMOTE "Brick is remote"
+#define SSTARTED_SELF_HEAL "Started self-heal"
+#define SOP_NOT_SUPPORTED "Operation Not Supported"
+#define SFILE_NOT_UNDER_DATA \
+ "The file is not under data or metadata " \
+ "split-brain"
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SALL_BRICKS_UP_TO_RESOLVE \
+ "All the bricks should be up to resolve the" \
+ " gfid split brain"
+#define SERROR_GETTING_SRC_BRICK "Error getting the source brick"
int
afr_selfheal(xlator_t *this, uuid_t gfid);
@@ -119,7 +140,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid);
+ unsigned char *sources, void *gfid, int *gfid_idx);
int
afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
@@ -166,7 +187,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
int
afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
uuid_t gfid, struct afr_reply *replies,
- unsigned char *discover_on);
+ unsigned char *discover_on, dict_t *dict);
inode_t *
afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
const char *name, struct afr_reply *replies,
@@ -305,7 +326,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal);
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
int
afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
@@ -347,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index e2de77a9c45..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -12,11 +12,10 @@
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "protocol-common.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop-utils.h>
#include "afr-messages.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
-#define SHD_INODE_LRU_LIMIT 2048
#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
#define AFR_STATISTICS_HISTORY_SIZE 50
@@ -95,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -223,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -372,8 +371,9 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
+ _mask_cancellation();
}
void
@@ -386,8 +386,8 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
- history = memdup(event, sizeof(*event));
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
if (!history)
@@ -395,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
GF_FREE(history);
+ _unmask_cancellation();
}
int
@@ -423,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -465,7 +466,7 @@ afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid)
}
xdata = dict_new();
- if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) {
+ if (!xdata || dict_set_int32_sizen(xdata, "get-gfid-type", 1)) {
ret = -ENOMEM;
goto out;
}
@@ -523,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
afr_private_t *priv = NULL;
priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (!priv->shd.enabled)
return -EBUSY;
@@ -591,7 +597,9 @@ _afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
{
afr_private_t *priv = NULL;
dict_t *xattr = NULL;
- int *raw = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
int ret = -1;
int i = 0;
@@ -603,18 +611,11 @@ _afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
"Failed to create dict.");
goto out;
}
-
for (i = 0; i < priv->child_count; i++) {
- raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
- if (!raw)
- goto out;
-
- ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret) {
- GF_FREE(raw);
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], &raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret)
goto out;
- }
}
ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
@@ -641,6 +642,7 @@ afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
if (afr_shd_fill_ta_loc(this, loc)) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc->name);
+ ret = -1;
goto out;
}
@@ -722,9 +724,9 @@ afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
}
ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof (int));
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
if (ret) {
- GF_FREE (raw);
+ GF_FREE(raw);
goto out;
}
@@ -799,6 +801,218 @@ afr_bricks_available_for_heal(afr_private_t *priv)
return _gf_true;
}
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
+{
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return need_heal;
+}
+
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -825,7 +1039,8 @@ afr_shd_index_healer(void *data)
priv->local[healer->subvol] = healer->local;
if (priv->thin_arbiter_count) {
- afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
}
do {
@@ -855,9 +1070,17 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
- if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
+ if (ret == 0 && pre_crawl_xdata &&
+ !healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
pre_crawl_xdata);
+ }
+
+ if (pre_crawl_xdata) {
dict_unref(pre_crawl_xdata);
pre_crawl_xdata = NULL;
}
@@ -975,7 +1198,9 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
{
int ret = 0;
uint64_t count = 0;
- char key[256] = {0};
+ char key[128] = {0};
+ int keylen = 0;
+ char suffix[64] = {0};
int xl_id = 0;
uint64_t healed_count = 0;
uint64_t split_brain_count = 0;
@@ -1010,8 +1235,8 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
ret = dict_get_uint64(output, key, &count);
- snprintf(key, sizeof(key), "statistics_healed_cnt-%d-%d-%" PRIu64, xl_id,
- child, count);
+ snprintf(suffix, sizeof(suffix), "%d-%d-%" PRIu64, xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_healed_cnt-%s", suffix);
ret = dict_set_uint64(output, key, healed_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1019,8 +1244,7 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_sb_cnt-%d-%d-%" PRIu64, xl_id, child,
- count);
+ snprintf(key, sizeof(key), "statistics_sb_cnt-%s", suffix);
ret = dict_set_uint64(output, key, split_brain_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1028,17 +1252,15 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_crawl_type-%d-%d-%" PRIu64, xl_id,
- child, count);
- ret = dict_set_str(output, key, crawl_type);
+ keylen = snprintf(key, sizeof(key), "statistics_crawl_type-%s", suffix);
+ ret = dict_set_strn(output, key, keylen, crawl_type);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_type to output");
goto out;
}
- snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%d-%d-%" PRIu64,
- xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%s", suffix);
ret = dict_set_uint64(output, key, heal_failed_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1046,9 +1268,8 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_strt_time-%d-%d-%" PRIu64, xl_id,
- child, count);
- ret = dict_set_dynstr(output, key, start_time_str);
+ keylen = snprintf(key, sizeof(key), "statistics_strt_time-%s", suffix);
+ ret = dict_set_dynstrn(output, key, keylen, start_time_str);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_start_time to output");
@@ -1062,11 +1283,10 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
else
progress = 0;
- snprintf(key, sizeof(key), "statistics_end_time-%d-%d-%" PRIu64, xl_id,
- child, count);
+ keylen = snprintf(key, sizeof(key), "statistics_end_time-%s", suffix);
if (!end_time_str)
end_time_str = gf_strdup("Could not determine the end time");
- ret = dict_set_dynstr(output, key, end_time_str);
+ ret = dict_set_dynstrn(output, key, keylen, end_time_str);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_end_time to output");
@@ -1075,10 +1295,9 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
end_time_str = NULL;
}
- snprintf(key, sizeof(key), "statistics_inprogress-%d-%d-%" PRIu64, xl_id,
- child, count);
+ keylen = snprintf(key, sizeof(key), "statistics_inprogress-%s", suffix);
- ret = dict_set_int32(output, key, progress);
+ ret = dict_set_int32n(output, key, keylen, progress);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_inprogress to output");
@@ -1104,7 +1323,9 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
{
int ret = -1;
uint64_t count = 0;
- char key[256] = {0};
+ char key[64] = {0};
+ int keylen = 0;
+ char xl_id_child_str[32] = {0};
int xl_id = 0;
ret = dict_get_int32(output, this->name, &xl_id);
@@ -1114,11 +1335,12 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
goto out;
}
- snprintf(key, sizeof(key), "%d-%d-count", xl_id, child);
+ snprintf(xl_id_child_str, sizeof(xl_id_child_str), "%d-%d", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
ret = dict_get_uint64(output, key, &count);
- snprintf(key, sizeof(key), "%d-%d-%" PRIu64, xl_id, child, count);
- ret = dict_set_dynstr(output, key, path);
+ keylen = snprintf(key, sizeof(key), "%s-%" PRIu64, xl_id_child_str, count);
+ ret = dict_set_dynstrn(output, key, keylen, path);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1127,7 +1349,7 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
}
if (tv) {
- snprintf(key, sizeof(key), "%d-%d-%" PRIu64 "-time", xl_id, child,
+ snprintf(key, sizeof(key), "%s-%" PRIu64 "-time", xl_id_child_str,
count);
ret = dict_set_uint32(output, key, tv->tv_sec);
if (ret) {
@@ -1137,7 +1359,7 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
}
}
- snprintf(key, sizeof(key), "%d-%d-count", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
ret = dict_set_uint64(output, key, count + 1);
if (ret) {
@@ -1210,10 +1432,6 @@ afr_selfheal_daemon_init(xlator_t *this)
priv = this->private;
shd = &priv->shd;
- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
-
shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers),
priv->child_count,
gf_afr_mt_subvol_healer_t);
@@ -1264,12 +1482,18 @@ out:
return ret;
}
-int
-afr_selfheal_childup(xlator_t *this, int subvol)
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
{
- afr_shd_index_healer_spawn(this, subvol);
+ int subvol = 0;
- return 0;
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
+
+ return;
}
int
@@ -1319,41 +1543,78 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
struct subvol_healer *healer = NULL;
int i = 0;
char key[64];
+ int keylen = 0;
+ int this_name_len = 0;
int op_ret = 0;
uint64_t cnt = 0;
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
priv = this->private;
shd = &priv->shd;
- ret = dict_get_int32(input, "xl-op", (int32_t *)&op);
- if (ret)
+ ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
goto out;
- ret = dict_get_int32(input, this->name, &xl_id);
- if (ret)
+ }
+ this_name_len = strlen(this->name);
+ ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
- ret = dict_set_int32(output, this->name, xl_id);
- if (ret)
+ }
+ ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
switch (op) {
case GF_SHD_OP_HEAL_INDEX:
op_ret = 0;
for (i = 0; i < priv->child_count; i++) {
healer = &shd->index_healers[i];
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_str(output, key, "Brick is not connected");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
op_ret = -1;
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_str(output, key,
- "< 2 bricks in replica are up");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
op_ret = -1;
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_str(output, key, "Brick is remote");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_str(output, key, "Started self-heal");
- afr_shd_index_healer_spawn(this, i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_index_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
}
}
break;
@@ -1362,18 +1623,31 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
for (i = 0; i < priv->child_count; i++) {
healer = &shd->full_healers[i];
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_str(output, key, "Brick is not connected");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_str(output, key,
- "< 2 bricks in replica are up");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_str(output, key, "Brick is remote");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_str(output, key, "Started self-heal");
- afr_shd_full_healer_spawn(this, i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_full_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1381,25 +1655,25 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_INDEX_SUMMARY:
/* this case has been handled in glfs-heal.c */
break;
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
- for (i = 0; i < priv->child_count; i++) {
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_str(output, key,
- "Operation Not "
- "Supported");
- }
- break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
eh_dump(shd->split_brain, output, afr_add_shd_event);
break;
case GF_SHD_OP_STATISTICS:
for (i = 0; i < priv->child_count; i++) {
eh_dump(shd->statistics[i], output, afr_add_crawl_event);
- afr_shd_dict_add_crawl_event(
+ ret = afr_shd_dict_add_crawl_event(
this, output, &shd->index_healers[i].crawl_event);
- afr_shd_dict_add_crawl_event(this, output,
- &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
}
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
@@ -1408,14 +1682,21 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
for (i = 0; i < priv->child_count; i++) {
if (!priv->child_up[i]) {
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_str(output, key, "Brick is not connected");
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
+ i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
} else {
snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i);
ret = afr_shd_get_index_count(this, i, &cnt);
if (ret == 0) {
ret = dict_set_uint64(output, key, cnt);
}
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1423,11 +1704,13 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG,
- "Unknown set op %d", op);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
break;
}
out:
- dict_del(output, this->name);
+ dict_deln(output, this->name, this_name_len);
return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 7de7c431460..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -14,12 +14,11 @@
#include <pthread.h>
typedef struct {
- int child;
char *path;
+ int child;
} shd_event_t;
typedef struct {
- int child;
uint64_t healed_count;
uint64_t split_brain_count;
uint64_t heal_failed_count;
@@ -31,38 +30,36 @@ typedef struct {
cralwer is in progress */
time_t end_time;
char *crawl_type;
+ int child;
} crawl_event_t;
struct subvol_healer {
xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
crawl_event_t crawl_event;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
};
typedef struct {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
struct subvol_healer *index_healers;
struct subvol_healer *full_healers;
eh_t *split_brain;
eh_t **statistics;
+ int timeout;
uint32_t max_threads;
uint32_t wait_qlength;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
} afr_self_heald_t;
int
-afr_selfheal_childup(xlator_t *this, int subvol);
-
-int
afr_selfheal_daemon_init(xlator_t *this);
int
@@ -73,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fb78c198d9c..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "timer.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/timer.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -28,6 +28,12 @@ typedef enum {
static void
afr_lock_resume_shared(struct list_head *list);
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno);
+
void
__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
@@ -68,6 +74,14 @@ afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this);
static void
afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno);
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv)
+{
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ priv->ta_notify_dom_lock_offset = 0;
+}
+
static void
afr_ta_process_waitq(xlator_t *this)
{
@@ -110,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -121,17 +135,16 @@ afr_release_notify_lock_for_ta(void *opaque)
flock.l_len = 1;
ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
- if (!ret) {
- LOCK(&priv->lock);
- priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
- priv->release_ta_notify_dom_lock = _gf_false;
- priv->ta_notify_dom_lock_offset = 0;
- UNLOCK(&priv->lock);
- } else {
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to unlock AFR_TA_DOM_NOTIFY lock.");
}
+ LOCK(&priv->lock);
+ {
+ afr_ta_locked_priv_invalidate(priv);
+ }
+ UNLOCK(&priv->lock);
out:
loc_wipe(&loc);
return ret;
@@ -242,7 +255,7 @@ afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)
}
}
- if (afr_has_quorum(success_children, this)) {
+ if (afr_has_quorum(success_children, this, NULL)) {
return _gf_true;
}
@@ -366,6 +379,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
}
local->transaction.unwind(frame, this);
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
AFR_STACK_DESTROY(frame);
return 0;
@@ -387,7 +402,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
}
static void
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
+afr_handle_lock_acquire_failure(afr_local_t *local)
{
struct list_head shared;
afr_lock_t *lock = NULL;
@@ -408,13 +423,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
afr_lock_fail_shared(local, &shared);
local->transaction.do_eager_unlock = _gf_true;
out:
- if (locked) {
- local->internal_lock.lock_cbk = afr_transaction_done;
- afr_unlock(local->transaction.frame, local->transaction.frame->this);
- } else {
- afr_transaction_done(local->transaction.frame,
- local->transaction.frame->this);
- }
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
call_frame_t *
@@ -511,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -613,7 +587,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
failure_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
if (failure_count == priv->child_count) {
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
return 0;
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
@@ -660,8 +634,9 @@ afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
return ret;
}
+
static void
-afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
+afr_ta_dom_lock_check_and_release(afr_ta_fop_state_t fop_state, xlator_t *this)
{
afr_private_t *priv = this->private;
unsigned int inmem_count = 0;
@@ -671,17 +646,25 @@ afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
LOCK(&priv->lock);
{
/*Once we get notify lock release upcall notification,
- if two fop states are non empty/non zero, we will
- not release lock.
- 1 - If anything in memory txn
- 2 - If anything in onwire or onwireq
+ if any of the fop state counters are non-zero, we will
+ not release the lock.
*/
- if (local->fop_state == TA_INFO_IN_MEMORY_SUCCESS) {
- inmem_count = --priv->ta_in_mem_txn_count;
- } else {
- inmem_count = priv->ta_in_mem_txn_count;
- }
onwire_count = priv->ta_on_wire_txn_count;
+ inmem_count = priv->ta_in_mem_txn_count;
+ switch (fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ onwire_count = --priv->ta_on_wire_txn_count;
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ case TA_INFO_IN_MEMORY_FAILED:
+ inmem_count = --priv->ta_in_mem_txn_count;
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ GF_ASSERT(0);
+ break;
+ case TA_SUCCESS:
+ break;
+ }
release = priv->release_ta_notify_dom_lock;
}
UNLOCK(&priv->lock);
@@ -693,7 +676,7 @@ afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
}
static void
-afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
+afr_ta_process_onwireq(afr_ta_fop_state_t fop_state, xlator_t *this)
{
afr_private_t *priv = this->private;
afr_local_t *entry = NULL;
@@ -706,15 +689,6 @@ afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
LOCK(&priv->lock);
{
- if (--priv->ta_on_wire_txn_count == 0) {
- UNLOCK(&priv->lock);
- /*Only one write fop came and after taking notify
- *lock and before doing xattrop, it has received
- *lock contention upcall, so this is the only place
- *to find this out and release the lock*/
- afr_ta_dom_lock_check_and_release(local, this);
- return;
- }
bad_child = priv->ta_bad_child_index;
if (bad_child == AFR_CHILD_UNKNOWN) {
/*The previous on-wire ta_post_op was a failure. Just dequeue
@@ -735,13 +709,10 @@ afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
while (!list_empty(&onwireq)) {
entry = list_entry(onwireq.next, afr_local_t, ta_onwireq);
list_del_init(&entry->ta_onwireq);
- LOCK(&priv->lock);
- --priv->ta_on_wire_txn_count;
- UNLOCK(&priv->lock);
if (entry->ta_failed_subvol == bad_child) {
- afr_changelog_post_op_do(entry->transaction.frame, this);
+ afr_post_op_handle_success(entry->transaction.frame, this);
} else {
- afr_changelog_post_op_fail(entry->transaction.frame, this, EIO);
+ afr_post_op_handle_failure(entry->transaction.frame, this, EIO);
}
}
}
@@ -760,7 +731,7 @@ afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this)
if (priv->thin_arbiter_count) {
/*fop should not come here with TA_WAIT_FOR_NOTIFY_LOCK_REL state */
- afr_ta_dom_lock_check_and_release(frame->local, this);
+ afr_ta_dom_lock_check_and_release(local->fop_state, this);
}
/* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
@@ -797,21 +768,9 @@ afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno)
unsigned char *
afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock)
{
- unsigned char *locked_nodes = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->locked_nodes;
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- /*Because same set of subvols participate in all lockee
- * entities*/
- locked_nodes = int_lock->lockee[0].locked_nodes;
- break;
- }
- return locked_nodes;
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ return int_lock->lockee[0].locked_nodes;
}
int
@@ -869,7 +828,7 @@ afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)
}
gf_boolean_t
-afr_has_quorum(unsigned char *subvols, xlator_t *this)
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
{
unsigned int quorum_count = 0;
afr_private_t *priv = NULL;
@@ -878,6 +837,9 @@ afr_has_quorum(unsigned char *subvols, xlator_t *this)
priv = this->private;
up_children_count = AFR_COUNT(subvols, priv->child_count);
+ if (afr_lookup_has_quorum(frame, up_children_count))
+ return _gf_true;
+
if (priv->quorum_count == AFR_QUORUM_AUTO) {
/*
* Special case for auto-quorum with an even number of nodes.
@@ -932,7 +894,7 @@ afr_has_fop_quorum(call_frame_t *frame)
locked_nodes = afr_locked_nodes_get(local->transaction.type,
&local->internal_lock);
- return afr_has_quorum(locked_nodes, this);
+ return afr_has_quorum(locked_nodes, this, NULL);
}
static gf_boolean_t
@@ -950,7 +912,7 @@ afr_has_fop_cbk_quorum(call_frame_t *frame)
success[i] = 1;
}
- return afr_has_quorum(success, this);
+ return afr_has_quorum(success, this, NULL);
}
gf_boolean_t
@@ -973,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1028,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
@@ -1075,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1083,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1092,99 +1051,76 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
+static int
+afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_ta_fop_state_t fop_state;
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
+ local = (afr_local_t *)opaque;
+ fop_state = local->fop_state;
+ txn_frame = local->transaction.frame;
+ this = frame->this;
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
+ if (ret == 0) {
+ /*Mark pending xattrs on the up data brick.*/
+ afr_post_op_handle_success(txn_frame, this);
+ } else {
+ afr_post_op_handle_failure(txn_frame, this, -ret);
}
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
+ STACK_DESTROY(frame->root);
+ afr_ta_process_onwireq(fop_state, this);
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ return 0;
+}
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int ret = 0;
+ int i;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
}
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
- return ret;
+out:
+ return changelog;
}
-static int
-afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
+static void
+afr_ta_locked_xattrop_validate(afr_private_t *priv, afr_local_t *local,
+ gf_boolean_t *valid)
{
- xlator_t *this = NULL;
- afr_local_t *local = NULL;
-
- local = (afr_local_t *)opaque;
- this = frame->this;
-
- STACK_DESTROY(frame->root);
- afr_ta_process_onwireq(local, this);
-
- return 0;
+ if (priv->ta_event_gen > local->ta_event_gen) {
+ /* We can't trust the ta's response anymore.*/
+ afr_ta_locked_priv_invalidate(priv);
+ *valid = _gf_false;
+ return;
+ }
+ return;
}
static int
@@ -1193,27 +1129,25 @@ afr_ta_post_op_do(void *opaque)
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
xlator_t *this = NULL;
- call_frame_t *txn_frame = NULL;
dict_t *xattr = NULL;
- int **pending = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
int failed_subvol = -1;
int success_subvol = -1;
loc_t loc = {
0,
};
- int idx = 0;
int i = 0;
int ret = 0;
+ gf_boolean_t valid = _gf_true;
local = (afr_local_t *)opaque;
- txn_frame = local->transaction.frame;
- this = txn_frame->this;
+ this = local->transaction.frame->this;
priv = this->private;
- idx = afr_index_for_transaction_type(local->transaction.type);
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -1224,23 +1158,23 @@ afr_ta_post_op_do(void *opaque)
goto out;
}
- pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
- if (!pending) {
- ret = -ENOMEM;
- goto out;
- }
+ pending = alloca0(priv->child_count);
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.failed_subvols[i]) {
- pending[i][idx] = hton32(1);
+ pending[i] = 1;
failed_subvol = i;
} else {
success_subvol = i;
}
}
- ret = afr_set_pending_dict(priv, xattr, pending);
- if (ret < 0)
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog) {
+ ret = -ENOMEM;
goto out;
+ }
ret = afr_ta_post_op_lock(this, &loc);
if (ret)
@@ -1248,22 +1182,31 @@ afr_ta_post_op_do(void *opaque)
ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ }
LOCK(&priv->lock);
{
if (ret == 0) {
priv->ta_bad_child_index = failed_subvol;
} else if (ret == -EINVAL) {
priv->ta_bad_child_index = success_subvol;
+ ret = -EIO; /* TA failed the fop. Return EIO to application. */
}
+
+ afr_ta_locked_xattrop_validate(priv, local, &valid);
}
UNLOCK(&priv->lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ if (valid == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s for gfid %s invalidated due "
+ "to event-gen mismatch.",
priv->pending_key[THIN_ARBITER_BRICK_INDEX],
uuid_utoa(local->inode->gfid));
- if (ret == -EINVAL)
- ret = -EIO; /* TA failed the fop. Return EIO to application. */
+ ret = -EIO;
}
afr_ta_post_op_unlock(this, &loc);
@@ -1271,17 +1214,11 @@ out:
if (xattr)
dict_unref(xattr);
- if (pending)
- afr_matrix_cleanup(pending, priv->child_count);
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
loc_wipe(&loc);
- if (ret == 0) {
- /*Mark pending xattrs on the up data brick.*/
- afr_changelog_post_op_do(local->transaction.frame, this);
- } else {
- afr_changelog_post_op_fail(local->transaction.frame, this, -ret);
- }
return ret;
}
@@ -1340,6 +1277,7 @@ afr_ta_set_fop_state(afr_private_t *priv, afr_local_t *local,
/* Post-op on TA not needed as the fop succeeded only on the
* in-memory bad data brick and not the good one. Fail the fop.*/
local->fop_state = TA_INFO_IN_MEMORY_FAILED;
+ priv->ta_in_mem_txn_count++;
}
}
UNLOCK(&priv->lock);
@@ -1359,6 +1297,28 @@ afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
}
static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
+
+ return;
+}
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno)
+{
+ afr_changelog_post_op_fail(frame, this, op_errno);
+
+ return;
+}
+
+static void
afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -1380,10 +1340,12 @@ afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
/*Post releasing the notify lock, we will act on this queue*/
break;
case TA_INFO_IN_MEMORY_SUCCESS:
- afr_changelog_post_op_do(frame, this);
+ afr_post_op_handle_success(frame, this);
break;
case TA_INFO_IN_MEMORY_FAILED:
- afr_changelog_post_op_fail(frame, this, EIO);
+ afr_post_op_handle_failure(frame, this, EIO);
+ break;
+ default:
break;
}
return;
@@ -1713,6 +1675,7 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
int i = 0;
int ret = 0;
char *key = NULL;
+ int keylen = 0;
const char *name = NULL;
dict_t *xdata1 = NULL;
dict_t *xdata2 = NULL;
@@ -1770,7 +1733,8 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
}
if (need_entry_key_set) {
- ret = dict_set_str(xdata1, key, (char *)name);
+ keylen = strlen(key);
+ ret = dict_set_strn(xdata1, key, keylen, (char *)name);
if (ret)
gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"%s/%s: Could not set %s key during xattrop",
@@ -1780,7 +1744,8 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
if (!xdata2)
goto out;
- ret = dict_set_str(xdata2, key, (char *)local->newloc.name);
+ ret = dict_set_strn(xdata2, key, keylen,
+ (char *)local->newloc.name);
if (ret)
gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"%s/%s: Could not set %s key during "
@@ -2042,7 +2007,7 @@ err:
local->op_ret = -1;
local->op_errno = op_errno;
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
if (xdata_req)
dict_unref(xdata_req);
@@ -2051,7 +2016,7 @@ err:
}
int
-afr_post_nonblocking_inodelk_cbk(call_frame_t *frame, xlator_t *this)
+afr_post_nonblocking_lock_cbk(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -2062,36 +2027,12 @@ afr_post_nonblocking_inodelk_cbk(call_frame_t *frame, xlator_t *this)
/* Initiate blocking locks if non-blocking has failed */
if (int_lock->lock_op_ret < 0) {
gf_msg_debug(this->name, 0,
- "Non blocking inodelks failed. Proceeding to blocking");
+ "Non blocking locks failed. Proceeding to blocking");
int_lock->lock_cbk = afr_internal_lock_finish;
afr_blocking_lock(frame, this);
} else {
gf_msg_debug(this->name, 0,
- "Non blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish(frame, this);
- }
-
- return 0;
-}
-
-int
-afr_post_nonblocking_entrylk_cbk(call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_msg_debug(this->name, 0,
- "Non blocking entrylks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_internal_lock_finish;
- afr_blocking_lock(frame, this);
- } else {
- gf_msg_debug(this->name, 0,
- "Non blocking entrylks done. Proceeding to FOP");
+ "Non blocking locks done. Proceeding to FOP");
afr_internal_lock_finish(frame, this);
}
@@ -2109,7 +2050,7 @@ afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_BLOCKING_LKS_FAILED,
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INTERNAL_LKS_FAILED,
"Blocking entrylks failed.");
afr_transaction_done(frame, this);
@@ -2140,25 +2081,26 @@ afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this)
}
int
-afr_set_transaction_flock(xlator_t *this, afr_local_t *local)
+afr_set_transaction_flock(xlator_t *this, afr_local_t *local,
+ afr_lockee_t *lockee)
{
- afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ struct gf_flock *flock = NULL;
- int_lock = &local->internal_lock;
priv = this->private;
+ flock = &lockee->flock;
if ((priv->arbiter_count || local->transaction.eager_lock_on ||
priv->full_lock) &&
local->transaction.type == AFR_DATA_TRANSACTION) {
/*Lock entire file to avoid network split brains.*/
- int_lock->flock.l_len = 0;
- int_lock->flock.l_start = 0;
+ flock->l_len = 0;
+ flock->l_start = 0;
} else {
- int_lock->flock.l_len = local->transaction.len;
- int_lock->flock.l_start = local->transaction.start;
+ flock->l_len = local->transaction.len;
+ flock->l_start = local->transaction.start;
}
- int_lock->flock.l_type = F_WRLCK;
+ flock->l_type = F_WRLCK;
return 0;
}
@@ -2168,26 +2110,21 @@ afr_lock(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->lock_cbk = afr_post_nonblocking_lock_cbk;
int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- afr_set_transaction_flock(this, local);
-
- int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk(frame, this);
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_set_transaction_flock(this, local, &int_lock->lockee[i]);
+ }
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk(frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -2196,11 +2133,11 @@ afr_lock(call_frame_t *frame, xlator_t *this)
int_lock->lk_loc = &local->transaction.parent_loc;
else
GF_ASSERT(local->fd);
-
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk(frame, this);
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
break;
}
+ afr_lock_nonblocking(frame, this);
return 0;
}
@@ -2271,17 +2208,19 @@ afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check)
/* }}} */
static void
afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src,
- xlator_t *this)
+ xlator_t *this, int lockee_num)
{
afr_private_t *priv = this->private;
+ afr_lockee_t *sl = &src->lockee[lockee_num];
+ afr_lockee_t *dl = &dst->lockee[lockee_num];
dst->domain = src->domain;
- dst->flock.l_len = src->flock.l_len;
- dst->flock.l_start = src->flock.l_start;
- dst->flock.l_type = src->flock.l_type;
- dst->lock_count = src->lock_count;
- memcpy(dst->locked_nodes, src->locked_nodes,
- priv->child_count * sizeof(*dst->locked_nodes));
+ dl->flock.l_len = sl->flock.l_len;
+ dl->flock.l_start = sl->flock.l_start;
+ dl->flock.l_type = sl->flock.l_type;
+ dl->locked_count = sl->locked_count;
+ memcpy(dl->locked_nodes, sl->locked_nodes,
+ priv->child_count * sizeof(*dl->locked_nodes));
}
void
@@ -2302,7 +2241,7 @@ __afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared)
if (conflict && !list_empty(&lock->owners))
return;
afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock,
- each->transaction.frame->this);
+ each->transaction.frame->this, 0);
list_move_tail(&each->transaction.wait_list, shared);
list_add_tail(&each->transaction.owner_list, &lock->owners);
}
@@ -2337,7 +2276,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
if (local->internal_lock.lock_op_ret < 0) {
- afr_handle_lock_acquire_failure(local, _gf_false);
+ afr_handle_lock_acquire_failure(local);
} else {
lock->event_generation = local->event_generation;
afr_changelog_pre_op(frame, this);
@@ -2408,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
@@ -2536,8 +2480,10 @@ afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
xdata = dict_new();
- if (xdata)
- ret = dict_set_int32(xdata, "batch-fsync", 1);
+ if (xdata) {
+ ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
@@ -2779,7 +2725,7 @@ __afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
*timer_local = list_entry(lock->post_op.next, afr_local_t,
transaction.owner_list);
afr_copy_inodelk_vars(&local->internal_lock,
- &(*timer_local)->internal_lock, this);
+ &(*timer_local)->internal_lock, this, 0);
lock->delay_timer = NULL;
*do_pre_op = _gf_true;
list_add_tail(&local->transaction.owner_list, &lock->owners);
@@ -2796,7 +2742,7 @@ __afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
owner_local = list_entry(lock->owners.next, afr_local_t,
transaction.owner_list);
afr_copy_inodelk_vars(&local->internal_lock,
- &owner_local->internal_lock, this);
+ &owner_local->internal_lock, this, 0);
*take_lock = _gf_false;
*do_pre_op = _gf_true;
}
@@ -2866,6 +2812,62 @@ fail:
}
int
+afr_transaction_lockee_init(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_private_t *priv = frame->this->private;
+ int ret = 0;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = afr_add_inode_lockee(local, priv->child_count);
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = afr_add_entry_lockee(local, &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ if (local->op == GF_FOP_RENAME) {
+ ret = afr_add_entry_lockee(
+ local, &local->transaction.new_parent_loc,
+ local->transaction.new_basename, priv->child_count);
+ if (ret) {
+ goto out;
+ }
+
+ if (local->newloc.inode &&
+ IA_ISDIR(local->newloc.inode->ia_type)) {
+ ret = afr_add_entry_lockee(local, &local->newloc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+ } else if (local->op == GF_FOP_RMDIR) {
+ ret = afr_add_entry_lockee(local, &local->loc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ if (int_lock->lockee_count > 1) {
+ qsort(int_lock->lockee, int_lock->lockee_count,
+ sizeof(*int_lock->lockee), afr_entry_lockee_cmp);
+ }
+ break;
+ }
+out:
+ return ret;
+}
+
+int
afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t *local = NULL;
@@ -2879,7 +2881,7 @@ afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.type = type;
- if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) {
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
ret = -afr_quorum_errno(priv);
goto out;
}
@@ -2898,6 +2900,10 @@ afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
if (ret < 0)
goto out;
+ ret = afr_transaction_lockee_init(frame);
+ if (ret)
+ goto out;
+
if (type != AFR_METADATA_TRANSACTION) {
goto txn_start;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 35a922544bc..beefa26f4a6 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -48,7 +48,7 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index);
call_frame_t *
afr_transaction_detach_fop_frame(call_frame_t *frame);
gf_boolean_t
-afr_has_quorum(unsigned char *subvols, xlator_t *this);
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);
gf_boolean_t
afr_needs_changelog_update(afr_local_t *local);
void
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 5d5e536ff60..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -87,7 +87,7 @@ static void
fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
dict_t *options)
{
- if (dict_get(options, "quorum-type") == NULL) {
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
/* If user doesn't configure anything enable auto-quorum if the
* replica has more than two subvolumes */
if (priv->child_count > 2)
@@ -120,23 +120,62 @@ afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
return 0;
}
+
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
+}
+
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
afr_private_t *priv = NULL;
xlator_t *read_subvol = NULL;
int read_subvol_index = -1;
+ int timeout_old = 0;
int ret = -1;
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
gf_boolean_t consistent_io = _gf_false;
gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
priv = this->private;
- GF_OPTION_RECONF("afr-dirty-xattr", priv->afr_dirty, options, str, out);
-
GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
priv->metadata_splitbrain_forced_heal, options, bool, out);
@@ -149,7 +188,9 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
bool, out);
- GF_OPTION_RECONF("data-self-heal", priv->data_self_heal, options, str, out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
out);
@@ -157,8 +198,9 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("data-self-heal-window-size",
priv->data_self_heal_window_size, options, uint32, out);
- GF_OPTION_RECONF("data-self-heal-algorithm", priv->data_self_heal_algorithm,
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
@@ -214,16 +256,19 @@ reconfigure(xlator_t *this, dict_t *options)
}
GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
- GF_OPTION_RECONF("locking-scheme", priv->locking_scheme, options, str, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
out);
GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
fix_quorum_options(this, priv, qtype, options);
- if (priv->quorum_count && !afr_has_quorum(priv->child_up, this))
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
"Client-quorum is not met");
@@ -236,11 +281,13 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
bool, out);
+ enabled_old = priv->shd.enabled;
GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
out);
+ timeout_old = priv->shd.timeout;
GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
@@ -264,6 +311,16 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
ret = 0;
out:
return ret;
@@ -349,7 +406,7 @@ afr_ta_init(afr_private_t *priv)
priv->release_ta_notify_dom_lock = _gf_false;
INIT_LIST_HEAD(&priv->ta_waitq);
INIT_LIST_HEAD(&priv->ta_onwireq);
- *priv->ta_gfid = 0;
+ gf_uuid_clear(priv->ta_gfid);
}
int32_t
@@ -366,6 +423,9 @@ init(xlator_t *this)
char *qtype = NULL;
char *fav_child_policy = NULL;
char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
if (!this->children) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
@@ -385,6 +445,8 @@ init(xlator_t *this)
goto out;
priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
LOCK_INIT(&priv->lock);
child_count = xlator_subvolume_count(this);
@@ -448,10 +510,13 @@ init(xlator_t *this)
GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
- GF_OPTION_INIT("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT("data-self-heal-algorithm", priv->data_self_heal_algorithm,
- str, out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
GF_OPTION_INIT("data-self-heal-window-size",
priv->data_self_heal_window_size, uint32, out);
@@ -479,7 +544,8 @@ init(xlator_t *this)
out);
GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
- GF_OPTION_INIT("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
@@ -500,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -512,13 +580,19 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -559,12 +633,20 @@ init(xlator_t *this)
goto out;
}
- ret = afr_selfheal_daemon_init(this);
- if (ret) {
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
ret = -ENOMEM;
goto out;
}
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
/* keep more local here as we may need them for self-heal etc */
this->local_pool = mem_pool_new(afr_local_t, 512);
if (!this->local_pool) {
@@ -578,24 +660,91 @@ init(xlator_t *this)
out:
return ret;
}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
-int
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
fini(xlator_t *this)
{
afr_private_t *priv = NULL;
priv = this->private;
+
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+
LOCK(&priv->lock);
if (priv->timer != NULL) {
gf_timer_call_cancel(this->ctx, priv->timer);
priv->timer = NULL;
}
UNLOCK(&priv->lock);
+
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
this->private = NULL;
afr_priv_destroy(priv);
- // if (this->itable);//I don't see any destroy func
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
- return 0;
+ return;
}
struct xlator_fops fops = {
@@ -619,6 +768,7 @@ struct xlator_fops fops = {
.getxattr = afr_getxattr,
.fgetxattr = afr_fgetxattr,
.readv = afr_readv,
+ .seek = afr_seek,
/* inode write */
.writev = afr_writev,
@@ -691,7 +841,7 @@ struct volume_options options[] = {
{.key = {"read-hash-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = 3,
+ .max = 5,
.default_value = "1",
.op_version = {2},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
@@ -704,7 +854,10 @@ struct volume_options options[] = {
"1 = hash by GFID of file (all clients use "
"same subvolume).\n"
"2 = hash by GFID of file and client PID.\n"
- "3 = brick having the least outstanding read requests."},
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
{
.key = {"choose-local"},
.type = GF_OPTION_TYPE_BOOL,
@@ -797,7 +950,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
"disable", "open"},
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -834,7 +987,7 @@ struct volume_options options[] = {
"process would be applied simultaneously."},
{.key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -844,7 +997,7 @@ struct volume_options options[] = {
"the file/directory."},
{.key = {"entry-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -1164,5 +1317,28 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 6f8015380f0..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -11,17 +11,18 @@
#ifndef __AFR_H__
#define __AFR_H__
-#include "call-stub.h"
-#include "compat-errno.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/compat-errno.h>
#include "afr-mem-types.h"
#include "libxlator.h"
-#include "timer.h"
-#include "syncop.h"
+#include <glusterfs/timer.h>
+#include <glusterfs/syncop.h>
#include "afr-self-heald.h"
#include "afr-messages.h"
+#define SHD_INODE_LRU_LIMIT 1
#define AFR_PATHINFO_HEADER "REPLICATE:"
#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
@@ -38,7 +39,10 @@
#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -94,6 +98,25 @@ typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
} while (0)
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
+
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
+
typedef enum {
AFR_FAV_CHILD_NONE,
AFR_FAV_CHILD_BY_SIZE,
@@ -104,6 +127,12 @@ typedef enum {
} afr_favorite_child_policy;
typedef enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+ AFR_SELFHEAL_DATA_DYNAMIC,
+} afr_data_self_heal_type_t;
+
+typedef enum {
AFR_CHILD_UNKNOWN = -1,
AFR_CHILD_ZERO,
AFR_CHILD_ONE,
@@ -119,13 +148,27 @@ typedef enum {
*on BAD brick - Success*/
TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
*on GOOD brick - Failed*/
+ TA_SUCCESS, /*FOP succeeded on both data bricks.*/
} afr_ta_fop_state_t;
struct afr_nfsd {
- gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
};
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -136,26 +179,27 @@ typedef struct _afr_private {
inode_t *root_inode;
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
/* For thin-arbiter. */
- unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
uuid_t ta_gfid;
- unsigned char ta_child_up;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
int ta_bad_child_index;
- off_t ta_notify_dom_lock_offset;
- gf_boolean_t release_ta_notify_dom_lock;
+ int ta_event_gen;
unsigned int ta_in_mem_txn_count;
unsigned int ta_on_wire_txn_count;
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
+ unsigned char *halo_child_up;
int64_t *child_latency;
unsigned char *local;
char **pending_key;
- char *data_self_heal; /* on/off/open */
- char *data_self_heal_algorithm; /* name of algorithm */
+ afr_data_self_heal_type_t data_self_heal_algorithm;
unsigned int data_self_heal_window_size; /* max number of pipelined
read/writes */
@@ -170,30 +214,31 @@ typedef struct _afr_private {
int32_t healers; /* No. of elements currently undergoing background
heal*/
+ gf_boolean_t release_ta_notify_dom_lock;
+
gf_boolean_t metadata_self_heal; /* on/off */
gf_boolean_t entry_self_heal; /* on/off */
gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
- unsigned int hash_mode; /* for when read_child is not set */
gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
- afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
- resolution of split-brains.*/
+ gf_timer_t *timer; /* launched when parent up is received */
unsigned int wait_count; /* # of servers to wait for success */
- gf_timer_t *timer; /* launched when parent up is received */
-
+ unsigned char ta_child_up;
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
gf_boolean_t pre_op_compat; /* on/off */
uint32_t post_op_delay_secs;
unsigned int quorum_count;
- char vol_uuid[UUID_SIZE + 1];
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
int32_t *last_event;
/* @event_generation: Keeps count of number of events received which can
@@ -206,33 +251,41 @@ typedef struct _afr_private {
important as we might have had a network split brain.
*/
uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
gf_boolean_t choose_local;
gf_boolean_t did_discovery;
- uint64_t sh_readdir_size;
gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
char *sh_domain;
char *afr_dirty;
- gf_boolean_t halo_enabled;
- uint32_t halo_max_latency_msec;
- uint32_t halo_max_replicas;
- uint32_t halo_min_replicas;
+ uint64_t spb_choice_timeout;
afr_self_heald_t shd;
struct afr_nfsd nfsd;
- gf_boolean_t consistent_metadata;
- uint64_t spb_choice_timeout;
- gf_boolean_t need_heal;
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
- /* pump dependencies */
- void *pump_private;
- gf_boolean_t use_afr_in_pump;
- char *locking_scheme;
gf_boolean_t full_lock;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -281,12 +334,14 @@ afr_index_from_ia_type(ia_type_t type)
}
typedef struct {
+ struct gf_flock flock;
loc_t loc;
+ fd_t *fd;
char *basename;
unsigned char *locked_nodes;
int locked_count;
-} afr_entry_lockee_t;
+} afr_lockee_t;
int
afr_entry_lockee_cmp(const void *l1, const void *l2);
@@ -294,21 +349,17 @@ afr_entry_lockee_cmp(const void *l1, const void *l2);
typedef struct {
loc_t *lk_loc;
- int lockee_count;
- afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+ afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
- struct gf_flock flock;
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
- char lower_locked;
- char higher_locked;
- unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- int32_t lock_count;
- int32_t entrylk_lock_count;
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
int32_t lk_call_count;
int32_t lk_expected_count;
@@ -316,14 +367,15 @@ typedef struct {
int32_t lock_op_ret;
int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
} afr_internal_lock_t;
struct afr_reply {
int valid;
int32_t op_ret;
- int32_t op_errno;
dict_t *xattr; /*For xattrop*/
dict_t *xdata;
struct iatt poststat;
@@ -332,6 +384,7 @@ struct afr_reply {
struct iatt preparent;
struct iatt preparent2;
struct iatt postparent2;
+ int32_t op_errno;
/* For rchecksum */
uint8_t checksum[SHA256_DIGEST_LENGTH];
gf_boolean_t buf_has_zeroes;
@@ -355,6 +408,10 @@ typedef struct {
arrives, we continue to read off this subvol.
*/
int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
} afr_fd_ctx_t;
typedef enum {
@@ -371,8 +428,6 @@ typedef struct _afr_inode_lock_t {
*/
int32_t num_inodelks;
unsigned int event_generation;
- gf_boolean_t release;
- gf_boolean_t acquired;
gf_timer_t *delay_timer;
struct list_head owners; /*Transactions that are performing fop*/
struct list_head post_op; /*Transactions that are done with the fop
@@ -381,6 +436,8 @@ typedef struct _afr_inode_lock_t {
*conflicting transactions to complete*/
struct list_head frozen; /*Transactions that need to go as part of
* next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
} afr_lock_t;
typedef struct _afr_inode_ctx {
@@ -389,15 +446,11 @@ typedef struct _afr_inode_ctx {
int lock_count;
int spb_choice;
gf_timer_t *timer;
- gf_boolean_t need_refresh;
unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
int inherited[AFR_NUM_CHANGE_LOGS];
int on_disk[AFR_NUM_CHANGE_LOGS];
-
- /* set if any write on this fd was a non stable write
- (i.e, without O_SYNC or O_DSYNC)
- */
- gf_boolean_t witnessed_unstable_write;
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
/* @open_fd_count:
Number of open FDs queried from the server, as queried through
@@ -405,8 +458,12 @@ typedef struct _afr_inode_ctx {
temporarily disabled.
*/
uint32_t open_fd_count;
- /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
- afr_lock_t lock[2];
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
} afr_inode_ctx_t;
typedef struct _afr_local {
@@ -420,19 +477,15 @@ typedef struct _afr_local {
unsigned int event_generation;
uint32_t open_fd_count;
- gf_boolean_t update_open_fd_count;
int32_t num_inodelks;
- gf_boolean_t update_num_inodelks;
-
- gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
- int32_t **pending;
-
int dirty[AFR_NUM_CHANGE_LOGS];
+ int32_t **pending;
+
loc_t loc;
loc_t newloc;
@@ -463,14 +516,6 @@ typedef struct _afr_local {
afr_read_txn_wind_t readfn;
- /* @refreshed:
-
- the inode was "refreshed" (i.e, pending xattrs from all subvols
- freshly inspected and inode ctx updated accordingly) as part of
- this transaction already.
- */
- gf_boolean_t refreshed;
-
/* @inode:
the inode on which the read txn is performed on. ref'ed and copied
@@ -495,8 +540,6 @@ typedef struct _afr_local {
unsigned char *readable;
unsigned char *readable2; /*For rename transaction*/
- int read_subvol; /* Current read subvolume */
-
afr_inode_refresh_cbk_t refreshfn;
/* @refreshinode:
@@ -505,9 +548,30 @@ typedef struct _afr_local {
*/
inode_t *refreshinode;
+ dict_t *xattr_req;
+
+ dict_t *dict;
+
+ int read_subvol; /* Current read subvolume */
+
+ int optimistic_change_log;
+
+ afr_internal_lock_t internal_lock;
+
/*To handle setattr/setxattr on yet to be linked inode from dht*/
uuid_t refreshgfid;
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
+
/*
@pre_op_compat:
@@ -517,14 +581,6 @@ typedef struct _afr_local {
gf_boolean_t pre_op_compat;
- dict_t *xattr_req;
-
- afr_internal_lock_t internal_lock;
-
- dict_t *dict;
-
- int optimistic_change_log;
-
/* Is the current writev() going to perform a stable write?
i.e, is fd->flags or @flags writev param have O_SYNC or
O_DSYNC?
@@ -543,25 +599,25 @@ typedef struct _afr_local {
struct {
struct {
- gf_boolean_t needs_fresh_lookup;
- uuid_t gfid_req;
- } lookup;
-
- struct {
- unsigned char buf_set;
struct statvfs buf;
+ unsigned char buf_set;
} statfs;
struct {
- int32_t flags;
fd_t *fd;
+ int32_t flags;
} open;
struct {
- int32_t cmd;
struct gf_flock user_flock;
struct gf_flock ret_flock;
unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
} lk;
/* inode read */
@@ -586,8 +642,8 @@ typedef struct _afr_local {
struct {
char *name;
- int last_index;
long xattr_len;
+ int last_index;
} getxattr;
struct {
@@ -600,11 +656,10 @@ typedef struct _afr_local {
/* dir read */
struct {
+ uint32_t *checksum;
int success_count;
int32_t op_ret;
int32_t op_errno;
-
- uint32_t *checksum;
} opendir;
struct {
@@ -613,8 +668,8 @@ typedef struct _afr_local {
size_t size;
off_t offset;
dict_t *dict;
- gf_boolean_t failed;
int last_index;
+ gf_boolean_t failed;
} readdir;
/* inode write */
@@ -624,12 +679,11 @@ typedef struct _afr_local {
} inode_wfop; // common structure for all inode-write-fops
struct {
- int32_t op_ret;
-
struct iovec *vector;
struct iobref *iobref;
- int32_t count;
off_t offset;
+ int32_t op_ret;
+ int32_t count;
uint32_t flags;
} writev;
@@ -689,29 +743,25 @@ typedef struct _afr_local {
} create;
struct {
+ dict_t *params;
dev_t dev;
mode_t mode;
- dict_t *params;
} mknod;
struct {
- int32_t mode;
dict_t *params;
+ int32_t mode;
} mkdir;
struct {
- int flags;
- } rmdir;
-
- struct {
dict_t *params;
char *linkpath;
} symlink;
struct {
- int32_t mode;
off_t offset;
size_t len;
+ int32_t mode;
} fallocate;
struct {
@@ -738,10 +788,10 @@ typedef struct _afr_local {
struct {
char *volume;
char *basename;
+ void *xdata;
entrylk_cmd in_cmd;
entrylk_cmd cmd;
entrylk_type type;
- void *xdata;
} entrylk;
struct {
@@ -750,31 +800,33 @@ typedef struct _afr_local {
} seek;
struct {
- int32_t datasync;
- } fsync;
-
- struct {
struct gf_lease user_lease;
struct gf_lease ret_lease;
unsigned char *locked_nodes;
} lease;
- } cont;
+ struct {
+ int flags;
+ } rmdir;
- struct {
- off_t start, len;
+ struct {
+ int32_t datasync;
+ } fsync;
- gf_boolean_t eager_lock_on;
- gf_boolean_t do_eager_unlock;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
+
+ } cont;
+ struct {
char *basename;
char *new_basename;
loc_t parent_loc;
loc_t new_parent_loc;
- afr_transaction_type type;
-
/* stub to resume on destruction
of the transaction frame */
call_stub_t *resume_stub;
@@ -792,6 +844,30 @@ typedef struct _afr_local {
FOP failed. */
unsigned char *failed_subvols;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
+
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
+
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
+
+ off_t start, len;
+
+ afr_transaction_type type;
+
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
+
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
/* @dirtied: flag which indicates whether we set dirty flag
in the OP. Typically true when we are performing operation
on more than one subvol and optimistic changelog is disabled
@@ -816,6 +892,10 @@ typedef struct _afr_local {
*/
gf_boolean_t no_uninherit;
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
/* @uninherit_done:
@uninherit_value:
@@ -828,27 +908,7 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- gf_boolean_t in_flight_sb; /* Indicator for occurrence of
- split-brain while in the middle of
- a txn. */
- int32_t in_flight_sb_errno; /* This is where the cause of the
- failure on the last good copy of
- the file is stored.
- */
-
- /* @changelog_resume: function to be called after changlogging
- (either pre-op or post-op) is done
- */
- afr_changelog_resume_t changelog_resume;
-
- call_frame_t *main_frame; /*Fop frame*/
- call_frame_t *frame; /*Transaction frame*/
-
- int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-
- int (*unwind)(call_frame_t *frame, xlator_t *this);
-
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
@@ -861,33 +921,36 @@ typedef struct _afr_local {
mode_t umask;
int xflag;
- gf_boolean_t do_discovery;
struct afr_reply *replies;
/* For client side background heals. */
struct list_head healer;
call_frame_t *heal_frame;
- gf_boolean_t need_full_crawl;
- afr_fop_lock_state_t fop_lock_state;
-
- gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
/*For thin-arbiter transactions.*/
- unsigned char ta_child_up;
+ int ta_failed_subvol;
+ int ta_event_gen;
struct list_head ta_waitq;
struct list_head ta_onwireq;
afr_ta_fop_state_t fop_state;
- int ta_failed_subvol;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
+ gf_boolean_t is_new_entry;
} afr_local_t;
typedef struct afr_spbc_timeout {
call_frame_t *frame;
- gf_boolean_t d_spb;
- gf_boolean_t m_spb;
loc_t *loc;
int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
} afr_spbc_timeout_t;
typedef struct afr_spb_status {
@@ -897,9 +960,9 @@ typedef struct afr_spb_status {
typedef struct afr_empty_brick_args {
call_frame_t *frame;
+ char *op_type;
loc_t loc;
int empty_index;
- char *op_type;
} afr_empty_brick_args_t;
typedef struct afr_read_subvol_args {
@@ -941,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
@@ -973,11 +1039,14 @@ int
xattr_is_equal(dict_t *this, char *key1, data_t *value1, void *data);
int
-afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count);
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count);
+
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count);
void
-afr_entry_lockee_cleanup(afr_internal_lock_t *int_lock);
+afr_lockees_cleanup(afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery(xlator_t *this, int32_t child_index);
@@ -995,10 +1064,7 @@ int32_t
afr_unlock(call_frame_t *frame, xlator_t *this);
int
-afr_nonblocking_entrylk(call_frame_t *frame, xlator_t *this);
-
-int
-afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this);
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this);
int
afr_blocking_lock(call_frame_t *frame, xlator_t *this);
@@ -1057,6 +1123,9 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
if (__local && __local->is_read_txn) \
afr_pending_read_decrement(__this->private, \
__local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
frame->local = NULL; \
} \
\
@@ -1084,8 +1153,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
#define AFR_FRAME_INIT(frame, op_errno) \
({ \
frame->local = mem_get0(THIS->local_pool); \
- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \
- afr_local_cleanup(frame->local, THIS); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
mem_put(frame->local); \
frame->local = NULL; \
}; \
@@ -1209,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1265,9 +1334,6 @@ afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame);
void
afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
int32_t child_index);
-int
-afr_is_inodelk_transaction(afr_transaction_type type);
-
afr_fd_ctx_t *
__afr_fd_ctx_get(fd_t *fd, xlator_t *this);
@@ -1298,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1320,4 +1386,38 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
void
afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv);
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
+
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index caeb17b0f07..56f1f2ad7c8 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -14,19 +14,13 @@ dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/dht.sym \
- $(GF_XLATOR_LDFLAGS)
+dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/nufa.sym \
- $(GF_XLATOR_LDFLAGS)
+nufa_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/switch.sym \
- $(GF_XLATOR_LDFLAGS)
+switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
@@ -41,8 +35,6 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
CLEANFILES =
-EXTRA_DIST = dht.sym nufa.sym switch.sym
-
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/distribute.so
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 24402399597..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -10,48 +10,52 @@
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "libxlator.h"
#include "dht-common.h"
#include "dht-lock.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "quota-common-utils.h"
-#include "upcall-utils.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
#include <signal.h>
-int run_defrag = 0;
-
-int
-dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
-int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
- int ret);
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this);
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-int
-dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata);
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-int
-dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc);
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
+{
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
/* Sets the blocks and size values to fixed values. This is to be called
* only for dirs. The caller is responsible for checking the type
@@ -67,67 +71,17 @@ dht_set_fixed_dir_stat(struct iatt *stat)
return -1;
}
-/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
- * DHT_IATT_IN_XDATA_KEY
- */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req)
-{
- int ret = -1;
-
- ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
- ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
-
- /* At least one call succeeded */
- return ret;
-}
-
-/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
- * DHT_IATT_IN_XDATA_KEY
- * This will return a dummy iatt with only the mode and type set
- */
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
-{
- int ret = -1;
- int32_t mode = 0;
-
- ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-
- if (ret) {
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- } else {
- stbuf->ia_prot = ia_prot_from_st_mode(mode);
- stbuf->ia_type = ia_type_from_st_mode(mode);
- }
-
- return ret;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- NULL};
-
-char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-
/* Return true if key exists in array
*/
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
-int
+static int
dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = -1;
@@ -190,7 +144,7 @@ out:
return ret;
}
-int
+static int
add_opt(char **optsp, const char *opt)
{
char *newopts = NULL;
@@ -268,7 +222,7 @@ out:
*/
-int
+static int
dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = 0;
@@ -367,7 +321,7 @@ out:
return ret;
}
-int
+static int
dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
@@ -414,7 +368,7 @@ out:
return ret;
}
-void
+static void
dht_aggregate_xattr(dict_t *dst, dict_t *src)
{
if ((dst == NULL) || (src == NULL)) {
@@ -496,7 +450,7 @@ dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
- complete linkfile selfheal
*/
-int
+static int
dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -538,7 +492,7 @@ out:
return ret;
}
-int
+static int
dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
dht_local_t *local = NULL;
@@ -659,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -726,7 +681,7 @@ out:
return ret;
}
-int
+static int
dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -735,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_conf_t *conf = 0;
dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
@@ -742,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
conf = this->private;
layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
if (op_ret) {
gf_msg_debug(this->name, op_ret,
@@ -762,11 +719,63 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
layout);
}
out:
- if (local && local->mds_heal_fresh_lookup)
+ if (mds_heal_fresh_lookup)
DHT_STACK_DESTROY(frame);
return 0;
}
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
+
+ if (!path)
+ goto out;
+
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
+
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
+}
+
/* Common function call by revalidate/selfheal code path to populate
internal xattr if it is not present, mark_during_fresh_lookup value
determines either function is call by revalidate_cbk(discover_complete)
@@ -801,9 +810,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
call_frame_t *xattr_frame = NULL;
gf_boolean_t vol_down = _gf_false;
- this = frame->this;
-
GF_VALIDATE_OR_GOTO("dht", frame, out);
+ this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -812,6 +820,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
conf = this->private;
layout = local->selfheal.layout;
local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+
gf_uuid_unparse(local->gfid, gfid_local);
/* Code to update hashed subvol consider as a mds subvol
@@ -852,7 +861,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
"Failed to get hashed subvol for path %s"
"gfid is %s ",
local->loc.path, gfid_local);
- (*errst) = 1;
+ if (errst)
+ (*errst) = 1;
ret = -1;
goto out;
}
@@ -923,7 +933,44 @@ out:
return ret;
}
-int
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
dict_t *xattr, struct iatt *postparent)
@@ -1018,7 +1065,7 @@ dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->xattr == NULL) {
local->xattr = dict_ref(xattr);
} else {
- /* Don't aggregate for files. See BZ#1484113 */
+ /* Don't aggregate for files. See BZ#1484709 */
if (is_dir)
dht_aggregate_xattr(local->xattr, xattr);
}
@@ -1084,7 +1131,53 @@ out:
return 0;
}
-int
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
int ret;
@@ -1098,6 +1191,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
conf = this->private;
local = frame->local;
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
if (ret) {
goto err;
@@ -1109,6 +1205,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -1150,48 +1249,11 @@ err:
return 0;
}
-/* Get the value of key from dict in the bytewise and save in array after
- convert from network byte order to host byte order
-*/
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst)
-{
- void *ptr = NULL;
- int32_t len = -1;
- int32_t vindex = -1;
- int32_t err = -1;
- int ret = 0;
-
- if (dict == NULL) {
- (*errst) = -1;
- return -EINVAL;
- }
- err = dict_get_ptr_and_len(dict, key, &ptr, &len);
- if (err != 0) {
- (*errst) = -1;
- return err;
- }
-
- if (len != (size * sizeof(int32_t))) {
- (*errst) = -1;
- return -EINVAL;
- }
-
- for (vindex = 0; vindex < size; vindex++) {
- value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
- if (value[vindex] < 0)
- ret = -1;
- }
-
- return ret;
-}
-
/* Code to call syntask to heal custom xattr from hashed subvol
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1203,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1216,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1230,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
@@ -1238,6 +1303,51 @@ out:
return ret;
}
+static int
+dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int needs_selfheal = 0;
+ int ret = 0;
+
+ local = frame->local;
+ layout = local->layout;
+
+ if (local->need_attrheal || local->need_xattr_heal ||
+ local->need_selfheal) {
+ needs_selfheal = 1;
+ }
+
+ ret = dht_layout_normalize(this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
+ needs_selfheal = 1;
+ }
+ return needs_selfheal;
+}
+
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
int
dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
@@ -1255,8 +1365,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
char gfid_local[GF_UUID_BUF_SIZE] = {0};
char gfid_node[GF_UUID_BUF_SIZE] = {0};
int32_t mds_xattr_val[1] = {0};
- call_frame_t *copy = NULL;
- dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO("dht", frame, out);
GF_VALIDATE_OR_GOTO("dht", this, out);
@@ -1269,7 +1377,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
layout = local->layout;
+ gf_msg_debug(this->name, op_errno,
+ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
+ local->loc.path, prev->name, op_ret, op_errno);
+ /* The first successful lookup*/
if (!op_ret && gf_uuid_is_null(local->gfid)) {
memcpy(local->gfid, stbuf->ia_gfid, 16);
}
@@ -1291,29 +1403,28 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
/* TODO: assert equal mode on stbuf->st_mode and
local->stbuf->st_mode
-
else mkdir/chmod/chown and fix
*/
ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
if (op_ret == -1) {
local->op_errno = op_errno;
- gf_msg_debug(this->name, op_errno,
- "lookup of %s on %s returned error", local->loc.path,
- prev->name);
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_lookup_everywhere = 1;
+ }
goto unlock;
}
is_dir = check_is_dir(inode, stbuf, xattr);
if (!is_dir) {
gf_msg_debug(this->name, 0,
- "lookup of %s on %s returned non"
- "dir 0%o"
+ "%s: lookup on %s returned non dir 0%o"
"calling lookup_everywhere",
local->loc.path, prev->name, stbuf->ia_type);
- local->need_selfheal = 1;
+ local->need_lookup_everywhere = 1;
goto unlock;
}
@@ -1324,19 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_aggregate_xattr(local->xattr, xattr);
}
- if (dict_get(xattr, conf->mds_xattr_key)) {
- local->mds_subvol = prev;
- local->mds_stbuf.ia_gid = stbuf->ia_gid;
- local->mds_stbuf.ia_uid = stbuf->ia_uid;
- local->mds_stbuf.ia_prot = stbuf->ia_prot;
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
}
if (local->stbuf.ia_type != IA_INVAL) {
- if (!__is_root_gfid(stbuf->ia_gfid) &&
- ((local->stbuf.ia_gid != stbuf->ia_gid) ||
- (local->stbuf.ia_uid != stbuf->ia_uid) ||
- (is_permission_different(&local->stbuf.ia_prot,
- &stbuf->ia_prot)))) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
local->need_attrheal = 1;
}
}
@@ -1349,125 +1472,99 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict_get(xattr, conf->mds_xattr_key)) {
gf_msg_debug(this->name, 0,
- "Internal xattr %s is not present "
- " on path %s gfid is %s ",
- conf->mds_xattr_key, local->loc.path, gfid_local);
+ "%s: mds xattr %s is not present "
+ "on %s(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
goto unlock;
- } else {
- /* Save mds subvol on inode ctx */
- ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set hashed subvol for %s vol is %s",
- local->loc.path, prev->name);
- }
+ }
+
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
+ local->mds_subvol = prev;
+ local->mds_stbuf = *stbuf;
+
+ /* Save mds subvol on inode ctx */
+
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
}
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
- "Value of %s is not zero on hashed subvol "
- "so xattr needs to be heal on non hashed"
- " path is %s and vol name is %s "
- " gfid is %s",
- conf->mds_xattr_key, local->loc.path, prev->name,
+ "%s: %s is not zero on %s. Xattrs need to be healed."
+ "(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
gfid_local);
local->need_xattr_heal = 1;
- local->mds_subvol = prev;
}
}
+
unlock:
UNLOCK(&frame->lock);
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
+ /* If the mds subvol is not set correctly*/
+ if (!__is_root_gfid(local->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key))) {
+ local->need_selfheal = 1;
+ }
+
/* No need to call xattr heal code if volume count is 1
*/
- if (conf->subvolume_cnt == 1)
+ if (conf->subvolume_cnt == 1) {
local->need_xattr_heal = 0;
+ }
- /* Code to update all extended attributed from hashed subvol
- to local->xattr
- */
- if (local->need_xattr_heal && (local->mds_xattr)) {
- dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
- NULL, NULL);
- dict_unref(local->mds_xattr);
- local->mds_xattr = NULL;
+ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
}
- if (local->need_selfheal) {
- local->need_selfheal = 0;
+ if (local->need_lookup_everywhere) {
+ local->need_lookup_everywhere = 0;
dht_lookup_everywhere(frame, this, &local->loc);
return 0;
}
if (local->op_ret == 0) {
- ret = dht_layout_normalize(this, &local->loc, layout);
-
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "fixing assignment on %s",
- local->loc.path);
+ if (dht_needs_selfheal(frame, this)) {
goto selfheal;
}
dht_layout_set(this, local->inode, layout);
- if (!dict_get(local->xattr, conf->mds_xattr_key) ||
- local->need_xattr_heal)
- goto selfheal;
- }
-
- if (local->inode) {
- dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
- }
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
-
- if (local->need_attrheal) {
- local->need_attrheal = 0;
- if (!__is_root_gfid(inode->gfid)) {
- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ if (local->inode) {
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
}
- copy = create_frame(this, this->ctx->pool);
- if (copy) {
- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
- if (!copy_local) {
- DHT_STACK_DESTROY(copy);
- goto skip_attr_heal;
- }
- copy_local->stbuf = local->stbuf;
- gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid);
- copy_local->mds_stbuf = local->mds_stbuf;
- copy_local->mds_subvol = local->mds_subvol;
- copy->local = copy_local;
- FRAME_SU_DO(copy, dht_local_t);
- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
- dht_dir_attr_heal_done, copy, copy);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Synctask creation failed to heal attr "
- "for path %s gfid %s ",
- local->loc.path, local->gfid);
- DHT_STACK_DESTROY(copy);
- }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
}
- skip_attr_heal:
DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
dht_set_fixed_dir_stat(&local->postparent);
/* Delete mds xattr at the time of STACK UNWIND */
if (local->xattr)
GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -1483,24 +1580,57 @@ out:
return ret;
}
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- if ((prot1->owner.read != prot2->owner.read) ||
- (prot1->owner.write != prot2->owner.write) ||
- (prot1->owner.exec != prot2->owner.exec) ||
- (prot1->group.read != prot2->group.read) ||
- (prot1->group.write != prot2->group.write) ||
- (prot1->group.exec != prot2->group.exec) ||
- (prot1->other.read != prot2->other.read) ||
- (prot1->other.write != prot2->other.write) ||
- (prot1->other.exec != prot2->other.exec) ||
- (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
- (prot1->sticky != prot2->sticky)) {
- return 1;
- } else {
- return 0;
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
}
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
@@ -1517,13 +1647,11 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int is_dir = 0;
int is_linkfile = 0;
int follow_link = 0;
- call_frame_t *copy = NULL;
- dht_local_t *copy_local = NULL;
char gfid[GF_UUID_BUF_SIZE] = {0};
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
int32_t check_mds = 0;
- int errst = 0;
+ int errst = 0, i = 0;
int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -1537,6 +1665,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
&vol_commit_hash);
if (ret == 0) {
@@ -1546,17 +1676,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg_debug(this->name, op_errno,
+ "%s: revalidate lookup on %s returned op_ret %d",
+ local->loc.path, prev->name, op_ret);
+
LOCK(&frame->lock);
{
if (gf_uuid_is_null(local->gfid)) {
memcpy(local->gfid, local->loc.gfid, 16);
}
- gf_msg_debug(this->name, op_errno,
- "revalidate lookup of %s "
- "returned with op_ret %d",
- local->loc.path, op_ret);
-
if (op_ret == -1) {
local->op_errno = op_errno;
@@ -1588,8 +1717,27 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path);
local->need_lookup_everywhere = 1;
+ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
+ }
+ }
+
+ local->need_selfheal = 1;
}
}
+
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
+
goto unlock;
}
@@ -1639,15 +1787,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
(local->stbuf.ia_uid != stbuf->ia_uid) ||
is_permission_different(&local->stbuf.ia_prot,
&stbuf->ia_prot)) {
- local->need_selfheal = 1;
+ local->need_attrheal = 1;
}
}
if (!dict_get(xattr, conf->mds_xattr_key)) {
gf_msg_debug(this->name, 0,
- "internal xattr %s is not present"
- " on path %s gfid is %s ",
- conf->mds_xattr_key, local->loc.path, gfid);
+ "%s: internal xattr %s is not present"
+ " on subvol %s(gfid is %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid);
} else {
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
@@ -1665,6 +1814,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->name);
}
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
"Value of %s is not zero on "
@@ -1680,6 +1831,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
xattr);
if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
"Mismatching layouts for %s, gfid = %s", local->loc.path,
gfid);
@@ -1706,6 +1859,8 @@ unlock:
UNLOCK(&frame->lock);
if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
gf_uuid_copy(local->gfid, stbuf->ia_gfid);
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
@@ -1735,71 +1890,31 @@ unlock:
local->need_xattr_heal = 0;
if (IA_ISDIR(local->stbuf.ia_type)) {
- /* Code to update all extended attributed from hashed
- subvol to local->xattr and call heal code to heal
- custom xattr from hashed subvol to non-hashed subvol
- */
- if (local->need_xattr_heal && (local->mds_xattr)) {
- dht_dir_set_heal_xattr(this, local, local->xattr,
- local->mds_xattr, NULL, NULL);
- dict_unref(local->mds_xattr);
- local->mds_xattr = NULL;
- local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s "
- " gfid %s ",
- local->loc.path, gfid);
- } else {
- /* Call function to save hashed subvol on inode
- ctx if internal mds xattr is not present and
- all subvols are up
- */
- if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret))
- (void)dht_common_mark_mdsxattr(frame, NULL, 1);
- }
- }
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- if (!__is_root_gfid(inode->gfid)) {
- gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid);
- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
- } else {
- gf_uuid_copy(local->gfid, local->stbuf.ia_gfid);
- local->stbuf.ia_gid = local->prebuf.ia_gid;
- local->stbuf.ia_uid = local->prebuf.ia_uid;
- local->stbuf.ia_prot = local->prebuf.ia_prot;
- }
-
- copy = create_frame(this, this->ctx->pool);
- if (copy) {
- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
- if (!copy_local) {
- DHT_STACK_DESTROY(copy);
- goto cont;
- }
- copy_local->stbuf = local->stbuf;
- copy_local->mds_stbuf = local->mds_stbuf;
- copy_local->mds_subvol = local->mds_subvol;
- copy->local = copy_local;
- FRAME_SU_DO(copy, dht_local_t);
- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
- dht_dir_attr_heal_done, copy, copy);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Synctask creation failed to heal attr "
- "for path %s gfid %s ",
- local->loc.path, local->gfid);
- DHT_STACK_DESTROY(copy);
+ /* No mds xattr found. Trigger a heal to set it */
+ if (!__is_root_gfid(local->loc.inode->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key)))
+ local->need_selfheal = 1;
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+ if (local->mds_subvol) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
}
+
+ layout = local->layout;
+ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+ return 0;
}
}
- cont:
+
if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
@@ -1815,9 +1930,16 @@ unlock:
dht_layout_unref(this, local->layout);
local->layout = NULL;
- /* We know that current cached subvol is no more
+ /* We know that current cached subvol is no longer
valid, get the new one */
local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ }
+ }
+
dht_lookup_everywhere(frame, this, &local->loc);
return 0;
}
@@ -1858,12 +1980,11 @@ err:
return ret;
}
-int
-dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -1925,7 +2046,7 @@ out:
return ret;
}
-int
+static int
dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -1951,7 +2072,7 @@ dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -2003,7 +2124,7 @@ dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -2020,52 +2141,41 @@ dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
local = frame->local;
- if (local && local->loc.path)
- path = local->loc.path;
+ if (local) {
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (local->loc.path)
+ path = local->loc.path;
+ }
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
"Returned with op_ret %d and "
"op_errno %d for %s",
op_ret, op_errno, ((path == NULL) ? "null" : path));
- FRAME_SU_UNDO(frame, dht_local_t);
DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
return 0;
}
-int
+static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
-
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
-int32_t
+static int32_t
dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf, dict_t *xdata,
@@ -2113,7 +2223,7 @@ dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
"Creating linkto file on %s(hash) to "
"%s on %s (gfid = %s)",
local->hashed_subvol->name, local->loc.path,
- local->cached_subvol->name, gfid);
+ local->cached_subvol->name, gfid_str);
ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk,
this, local->cached_subvol,
@@ -2139,7 +2249,7 @@ no_linkto:
return 0;
}
-int32_t
+static int32_t
dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2225,7 +2335,7 @@ err:
* dht_lookup_everywhere_done takes decision based on any of the above case
*/
-int
+static int
dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
{
int ret = 0;
@@ -2251,6 +2361,16 @@ dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
return 0;
}
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
if (local->dir_count) {
dht_lookup_directory(frame, this, &local->loc);
@@ -2529,7 +2649,7 @@ unwind_hashed_and_cached:
return 0;
}
-int
+static int
dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
@@ -2570,6 +2690,8 @@ dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
if (op_errno != ENOENT)
local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
goto unlock;
}
@@ -2863,113 +2985,54 @@ out:
return 0;
}
-int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", frame, out);
- GF_VALIDATE_OR_GOTO("dht", this, unwind);
- GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO("dht", loc, unwind);
-
- conf = this->private;
- local = frame->local;
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
-
- if (local->xattr != NULL) {
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
-
- if (!gf_uuid_is_null(local->gfid)) {
- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- local->loc.path);
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(
- frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
- }
- return 0;
-unwind:
- DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
-out:
- return 0;
-}
-
/* Code to get hashed subvol based on inode and loc
First it check if loc->parent and loc->path exist then it get
hashed subvol based on loc.
*/
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+static gf_boolean_t
+dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
{
- char *path = NULL;
- loc_t populate_loc = {
- 0,
- };
- char *name = NULL;
- xlator_t *hash_subvol = NULL;
-
- if (!inode)
- return hash_subvol;
+ dht_layout_t *parent_layout = NULL;
+ int ret = 0;
+ gf_boolean_t lookup_everywhere = _gf_true;
+
+ /* lookup-optimize supersedes lookup-unhashed settings.
+ * If it is set, do not process search_unhashed
+ * If lookup-optimize if enabled, lookup everywhere if:
+ * - this is the rebalance daemon.
+ * - loc->parent is unavailable.
+ * - parent_layout is unavailable
+ * - parent_layout->commit_hash != conf->vol_commit_hash
+ */
- if (loc && loc->parent && loc->path) {
- if (!loc->name) {
- name = strrchr(loc->path, '/');
- if (name) {
- loc->name = name + 1;
- } else {
- goto out;
+ if (conf->lookup_optimize) {
+ if (!conf->defrag && loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
+ if (!ret && parent_layout &&
+ (parent_layout->commit_hash == conf->vol_commit_hash)) {
+ lookup_everywhere = _gf_false;
}
}
- hash_subvol = dht_subvol_get_hashed(this, loc);
goto out;
- }
-
- if (!gf_uuid_is_null(inode->gfid)) {
- populate_loc.inode = inode_ref(inode);
- populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
- inode_path(populate_loc.inode, NULL, &path);
-
- if (!path)
- goto out;
-
- populate_loc.path = path;
- if (!populate_loc.name && populate_loc.path) {
- name = strrchr(populate_loc.path, '/');
- if (name) {
- populate_loc.name = name + 1;
-
+ } else {
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ if (loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (!parent_layout->search_unhashed)) {
+ lookup_everywhere = _gf_false;
+ }
} else {
- goto out;
+ lookup_everywhere = _gf_false;
}
+
+ goto out;
}
- hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
}
out:
- if (populate_loc.inode)
- loc_wipe(&populate_loc);
- return hash_subvol;
+ return lookup_everywhere;
}
int
@@ -2985,7 +3048,6 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
loc_t *loc = NULL;
xlator_t *prev = NULL;
int ret = 0;
- dht_layout_t *parent_layout = NULL;
uint32_t vol_commit_hash = 0;
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -3000,104 +3062,82 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local = frame->local;
loc = &local->loc;
- /* This is required for handling stale linkfile deletion,
- * or any more call which happens from this 'loc'.
- */
- if (!op_ret && gf_uuid_is_null(local->gfid))
- memcpy(local->gfid, stbuf->ia_gfid, 16);
-
gf_msg_debug(this->name, op_errno,
- "fresh_lookup returned for %s with op_ret %d", loc->path,
- op_ret);
+ "%s: fresh_lookup on %s returned with op_ret %d", loc->path,
+ prev->name, op_ret);
- if (!conf->vch_forced) {
- ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
- &vol_commit_hash);
- if (ret == 0) {
- conf->vol_commit_hash = vol_commit_hash;
- }
- }
-
- if (ENTRY_MISSING(op_ret, op_errno)) {
- if (1 == conf->subvolume_cnt) {
- /* No need to lookup again */
- goto out;
- }
+ if (op_ret == -1) {
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (1 == conf->subvolume_cnt) {
+ /* No need to lookup again */
+ goto out;
+ }
- gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s", loc->path,
- prev->name);
+ gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s",
+ loc->path, prev->name);
- /* lookup-optimize supersedes lookup-unhashed settings,
- * - so if it is set, do not process search_unhashed
- * - except, in the case of rebalance daemon, we want to
- * force the lookup_everywhere behavior */
- if (!conf->defrag && conf->lookup_optimize && loc->parent) {
- ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
- if (ret || !parent_layout ||
- (parent_layout->commit_hash != conf->vol_commit_hash)) {
- gf_msg_debug(this->name, 0,
- "hashes don't match (ret - %d,"
- " parent_layout - %p, parent_hash - %x,"
- " vol_hash - %x), do global lookup",
- ret, parent_layout,
- (parent_layout ? parent_layout->commit_hash : -1),
- conf->vol_commit_hash);
+ if (dht_should_lookup_everywhere(this, conf, loc)) {
local->op_errno = ENOENT;
dht_lookup_everywhere(frame, this, loc);
return 0;
}
+
} else {
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere(frame, this, loc);
+ /* posix returns ENODATA if the gfid is not set but the client and
+ * server protocol layers do not send the stbuf. We need to
+ * heal this so check if this is a directory on the other subvols.
+ */
+ if ((op_errno == ENOTCONN) || (op_errno == ENODATA)) {
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
}
-
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = dht_inode_ctx_layout_get(loc->parent, this,
- &parent_layout);
- if (ret || !parent_layout)
- goto out;
- if (parent_layout->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere(frame, this, loc);
- return 0;
- }
- }
}
+ gf_msg_debug(this->name, op_errno, "%s: Lookup on subvolume %s failed",
+ loc->path, prev->name);
+ goto out;
}
- if (op_ret == 0) {
- is_dir = check_is_dir(inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref(inode);
- local->xattr = dict_ref(xattr);
+ /* Lookup succeeded - op_ret = 0 */
+
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+
+ if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
}
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
dht_lookup_directory(frame, this, &local->loc);
return 0;
}
- if (op_ret == -1) {
- gf_msg_debug(this->name, op_errno,
- "Lookup of %s for subvolume"
- " %s failed",
- loc->path, prev->name);
- goto out;
- }
-
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile) {
- /* non-directory and not a linkfile */
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
+ */
ret = dht_layout_preset(this, prev, inode);
if (ret < 0) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
- "could not set pre-set layout for subvolume %s", prev->name);
+ "%s: could not set pre-set layout for subvolume %s",
+ loc->path, prev->name);
op_ret = -1;
op_errno = EINVAL;
goto out;
@@ -3105,22 +3145,19 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
if (!subvol) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
- "linkfile not having link "
- "subvol for %s",
- loc->path);
-
- gf_msg_debug(this->name, 0,
- "linkfile not having link subvolume. path=%s", loc->path);
+ "%s: No link subvol for linkto", loc->path);
dht_lookup_everywhere(frame, this, loc);
return 0;
}
- gf_msg_debug(this->name, 0,
- "Calling lookup on linkto target %s for path %s", subvol->name,
- loc->path);
+ gf_msg_debug(this->name, 0, "%s: Calling lookup on linkto target %s",
+ loc->path, subvol->name);
STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
subvol->fops->lookup, &local->loc, local->xattr_req);
@@ -3147,11 +3184,11 @@ err:
return 0;
}
-/* For directories, check if acl xattrs have been requested (by the acl xlator),
- * if not, request for them. These xattrs are needed for dht dir self-heal to
- * perform proper self-healing of dirs
+/* For directories, check if acl xattrs have been requested (by the acl
+ * xlator), if not, request for them. These xattrs are needed for dht dir
+ * self-heal to perform proper self-healing of dirs
*/
-void
+static void
dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
{
int ret = 0;
@@ -3182,7 +3219,7 @@ dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
* the mds information : trusted.glusterfs.dht.mds
* the acl info: See above
*/
-int
+static int
dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
int ret = -EINVAL;
@@ -3223,50 +3260,109 @@ err:
return ret;
}
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int ret = -EINVAL;
+ int ret = -1;
dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
conf = this->private;
if (!conf) {
+ op_errno = EINVAL;
goto err;
}
- if (!xattr_req) {
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
goto err;
}
- /* Used to check whether this is a linkto file.
- */
- ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->link_xattr_name, loc->path);
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
goto err;
}
- /* This is used to make sure we don't unlink linkto files
- * which are the target of an ongoing file migration.
- */
- ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ op_errno = -ret;
goto err;
}
- ret = 0;
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
err:
- return ret;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-int
+static int
dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
xlator_t *subvol = NULL;
@@ -3341,6 +3437,11 @@ dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
local->mds_subvol = mds_subvol;
local->call_cnt = conf->subvolume_cnt;
+
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
+
call_cnt = local->call_cnt;
for (i = 0; i < call_cnt; i++) {
@@ -3374,91 +3475,11 @@ err:
return 0;
}
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int call_cnt = 0;
- int i = 0;
-
- conf = this->private;
- if (!conf) {
- op_errno = EINVAL;
- goto err;
- }
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- /* Since we don't know whether this is a file or a directory,
- * request all xattrs*/
- ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- /* This should have been set in dht_lookup */
- hashed_subvol = local->hashed_subvol;
-
- if (!hashed_subvol) {
- gf_msg_debug(this->name, 0,
- "%s: no subvolume in layout for path, "
- "checking on all the subvols to see if "
- "it is a directory",
- loc->path);
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_msg_debug(this->name, 0,
- "%s: Found null hashed subvol. Calling lookup"
- " on all nodes.",
- loc->path);
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc,
- local->xattr_req);
- }
- return 0;
- }
-
- /* if we have the hashed_subvol, send the lookup there first so
- * as to see whether we have a file or a directory */
- gf_msg_debug(this->name, 0,
- "Calling fresh lookup for %s on"
- " %s",
- loc->path, hashed_subvol->name);
-
- STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->lookup, loc, local->xattr_req);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
int
dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
@@ -3512,6 +3533,10 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
/* Nameless lookup */
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
!__is_root_gfid(loc->inode->gfid)) {
local->cached_subvol = NULL;
@@ -3520,6 +3545,9 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -3528,12 +3556,14 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
hashed_subvol = dht_subvol_get_hashed(this, loc);
local->hashed_subvol = hashed_subvol;
- /* The entry has been looked up before and has an inode_ctx set
- */
if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
dht_do_revalidate(frame, this, loc);
return 0;
} else {
+ /* Entry has not been looked up before
+ */
dht_do_fresh_lookup(frame, this, loc);
return 0;
}
@@ -3545,7 +3575,7 @@ err:
return 0;
}
-int
+static int
dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -3561,18 +3591,16 @@ dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) &&
!((op_errno == ENOENT) || (op_errno == ENOTCONN))) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
+ "Unlink link: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
dht_set_fixed_dir_stat(&local->preparent);
dht_set_fixed_dir_stat(&local->postparent);
DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
@@ -3581,7 +3609,7 @@ unlock:
return 0;
}
-int
+static int
dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -3602,9 +3630,10 @@ dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
} else {
local->op_ret = 0;
}
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno,
"Unlink: subvolume %s returned -1", prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
@@ -3619,9 +3648,8 @@ dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
&local->postparent, 1);
}
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!local->op_ret) {
hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
if (hashed_subvol && hashed_subvol != local->cached_subvol) {
@@ -3672,7 +3700,7 @@ dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -3687,22 +3715,24 @@ dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
if ((local->fop == GF_FOP_SETXATTR) ||
(local->fop == GF_FOP_FSETXATTR)) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if ((local->fop == GF_FOP_REMOVEXATTR) ||
(local->fop == GF_FOP_FREMOVEXATTR)) {
@@ -3711,6 +3741,7 @@ unlock:
}
}
+out:
return 0;
}
@@ -3741,7 +3772,7 @@ dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
return ret;
}
-int
+static int
dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
@@ -3757,27 +3788,34 @@ dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL);
}
+out:
return 0;
}
/* Code to wind a xattrop call to add 1 on current mds internal xattr
value
*/
-int
+static int
dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3798,11 +3836,14 @@ dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret && !local->op_ret) {
local->op_ret = op_ret;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->this->name);
+ goto post_unlock;
}
}
UNLOCK(&frame->lock);
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
@@ -3834,45 +3875,60 @@ dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
} else {
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
}
}
out:
- if (xattrop)
- dict_unref(xattrop);
if (ret) {
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
}
}
+just_return:
+ if (xattrop)
+ dict_unref(xattrop);
return 0;
}
-int
+static int
dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3932,16 +3988,22 @@ out:
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
@@ -3949,10 +4011,11 @@ out:
NULL);
}
+just_return:
return 0;
}
-int
+static int
dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
@@ -3999,16 +4062,22 @@ out:
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
@@ -4016,6 +4085,7 @@ out:
NULL);
}
+just_return:
return 0;
}
@@ -4058,7 +4128,7 @@ dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
}
}
-int
+static int
dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
int op_errno)
{
@@ -4107,7 +4177,7 @@ out:
return ret;
}
-int
+static int
dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
gf_boolean_t flag)
{
@@ -4163,7 +4233,7 @@ out:
return ret;
}
-int
+static int
dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4202,11 +4272,15 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
this_call_cnt = --local->call_cnt;
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
- "getxattr err for dir");
local->op_ret = -1;
local->op_errno = op_errno;
- goto unlock;
+ UNLOCK(&frame->lock);
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
}
ret = dict_get_str(xattr, local->xsel, &uuid_list);
@@ -4227,18 +4301,19 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
next_uuid_str = strtok_r(NULL, " ", &saveptr);
if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
- "Failed to parse uuid"
- " for %s",
- prev->name);
local->op_ret = -1;
local->op_errno = EINVAL;
- goto unlock;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid for %s", prev->name);
+ goto post_unlock;
}
count++;
@@ -4295,7 +4370,7 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!is_last_call(this_call_cnt))
goto out;
@@ -4317,7 +4392,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4336,23 +4411,28 @@ dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = --local->call_cnt;
if (op_ret < 0) {
if (op_errno != ENOTCONN) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
local->op_ret = -1;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
+ goto post_unlock;
}
goto unlock;
}
ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
- if (ret)
+ if (ret) {
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED,
"alloc or fill failure");
+ goto post_unlock;
+ }
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!is_last_call(this_call_cnt))
goto out;
@@ -4378,7 +4458,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4424,7 +4504,7 @@ cleanup:
return 0;
}
-int
+static int
dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4446,16 +4526,16 @@ dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
- VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
conf = this->private;
local = frame->local;
@@ -4464,9 +4544,7 @@ dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
goto out;
}
- if (dict_get(xattr, conf->xattr_name)) {
- dict_del(xattr, conf->xattr_name);
- }
+ dict_del(xattr, conf->xattr_name);
local->op_ret = 0;
if (!local->xattr) {
@@ -4477,6 +4555,9 @@ out:
DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
xdata);
return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
int
@@ -4486,14 +4567,22 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int this_call_cnt = 0;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
- VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
conf = this->private;
local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
LOCK(&frame->lock);
{
if (!xattr || (op_ret == -1)) {
@@ -4501,27 +4590,10 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto unlock;
}
- if (dict_get(xattr, conf->xattr_name)) {
- dict_del(xattr, conf->xattr_name);
- }
-
- if (dict_get(xattr, conf->mds_xattr_key)) {
- dict_del(xattr, conf->mds_xattr_key);
- }
-
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
- if (dict_get(xattr, conf->commithash_xattr_name)) {
- dict_del(xattr, conf->commithash_xattr_name);
- }
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
+ dict_del(xattr, conf->commithash_xattr_name);
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
@@ -4547,7 +4619,6 @@ unlock:
UNLOCK(&frame->lock);
this_call_cnt = dht_frame_return(frame);
-out:
if (is_last_call(this_call_cnt)) {
/* If we have a valid xattr received from any one of the
* subvolume, let's return it */
@@ -4559,9 +4630,12 @@ out:
local->xdata);
}
return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
-int32_t
+static int32_t
dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
dict_t *xdata)
{
@@ -4569,7 +4643,7 @@ dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
return 0;
}
-int
+static int
dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xattr, dict_t *xdata)
@@ -4581,7 +4655,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
LOCK(&frame->lock);
{
- if (local->op_errno == ENODATA || local->op_errno == EOPNOTSUPP) {
+ if (local->op_errno == EOPNOTSUPP) {
/* Nothing to do here, we have already found
* a subvol which does not have the get_real_filename
* optimization. If condition is for simple logic.
@@ -4590,7 +4664,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
}
if (op_ret == -1) {
- if (op_errno == ENODATA || op_errno == EOPNOTSUPP) {
+ if (op_errno == EOPNOTSUPP) {
/* This subvol does not have the optimization.
* Better let the user know we don't support it.
* Remove previous results if any.
@@ -4608,16 +4682,17 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno,
DHT_MSG_UPGRADE_BRICKS,
"At least "
"one of the bricks does not support "
"this operation. Please upgrade all "
"bricks.");
- goto unlock;
+ goto post_unlock;
}
- if (op_errno == ENOENT) {
+ if (op_errno == ENOATTR) {
/* Do nothing, our defaults are set to this.
*/
goto unlock;
@@ -4629,9 +4704,10 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
* down subvol and return a good result(if any)
* from other subvol.
*/
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno,
DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename.");
- goto unlock;
+ goto post_unlock;
}
/* This subvol has the required file.
@@ -4652,13 +4728,13 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->op_errno = 0;
- gf_msg_debug(this->name, 0,
- "Found a matching "
- "file.");
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, 0, "Found a matching file.");
+ goto post_unlock;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
@@ -4668,7 +4744,7 @@ unlock:
return 0;
}
-int
+static int
dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -4684,7 +4760,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
cnt = local->call_cnt = layout->cnt;
local->op_ret = -1;
- local->op_errno = ENOENT;
+ local->op_errno = ENOATTR;
for (i = 0; i < cnt; i++) {
subvol = layout->list[i].xlator;
@@ -4695,7 +4771,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
}
-int
+static int
dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
xlator_t **subvols)
{
@@ -4712,8 +4788,8 @@ dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
return layout->cnt;
}
-int
-dht_is_debug_xattr_key(char **array, char *key)
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
{
int i = 0;
@@ -4727,7 +4803,7 @@ dht_is_debug_xattr_key(char **array, char *key)
/* Note we already have frame->local initialised here*/
-int
+static int
dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key)
{
@@ -4739,10 +4815,6 @@ dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name = NULL;
local = frame->local;
- if (!key) {
- op_errno = EINVAL;
- goto out;
- }
if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
goto out;
@@ -4796,6 +4868,60 @@ out:
return 0;
}
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
int
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
dict_t *xdata)
@@ -4853,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
goto err;
}
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
if (!DHT_IS_DIR(layout))
goto no_dht_is_dir;
@@ -5085,8 +5216,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
}
}
- if (fd->inode)
- gf_uuid_unparse(fd->inode->gfid, gfid);
+ gf_uuid_unparse(fd->inode->gfid, gfid);
if ((fd->inode->ia_type == IA_IFDIR) && key &&
(strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) !=
@@ -5149,6 +5279,53 @@ err:
return 0;
}
+static int
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
+}
+
int
dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5165,8 +5342,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5239,7 +5416,7 @@ dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
*/
-int
+static int
dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
fd_t *fd, dict_t *xattr, int flags,
dict_t *xdata, int *op_errno)
@@ -5256,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
@@ -5343,9 +5522,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
} else {
gf_msg(this->name, GF_LOG_ERROR, 0,
DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get mds subvol for path %s"
- "gfid is %s ",
- loc->path, gfid_local);
+ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
+ gfid_local);
}
(*op_errno) = ENOENT;
goto err;
@@ -5493,7 +5671,7 @@ err:
return 0;
}
-int
+static int
dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -5532,54 +5710,7 @@ out:
return 0;
}
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (local->fop == GF_FOP_SETXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->setxattr, &local->loc,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->fsetxattr, local->fd,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
- NULL);
- return 0;
-}
-
-int
+static int
dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -5588,7 +5719,7 @@ dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
{
if (!IA_ISDIR(loc->inode->ia_type)) {
@@ -5741,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -5875,6 +5991,50 @@ err:
return 0;
}
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
int
dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5891,8 +6051,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5952,84 +6112,6 @@ out:
}
int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- if (local->fop == GF_FOP_REMOVEXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->removexattr, &local->loc, local->key,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->fremovexattr, local->fd, local->key,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-dht_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
-
- return 0;
-}
-
-int
dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -6206,16 +6288,16 @@ dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt))
DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd,
@@ -6227,7 +6309,7 @@ unlock:
/*
* dht_normalize_stats -
*/
-void
+static void
dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
unsigned long frsize)
{
@@ -6246,7 +6328,7 @@ dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
}
}
-int
+static int
dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
@@ -6356,9 +6438,7 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int i = -1;
inode_t *inode = NULL;
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
loc_t newloc = {
0,
};
@@ -6384,7 +6464,6 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
}
loc = &local->loc2;
- root_gfid[15] = 1;
inode = inode_find(itable, root_gfid);
if (!inode) {
@@ -6493,15 +6572,16 @@ err:
}
/* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned.
- This functions assigns an inode if all of the following conditions are true:
+ This functions assigns an inode if all of the following conditions are
+ true:
- * DHT has only one child. In this case the entire layout is present on this
- single child and hence we can set complete layout in inode.
- * backend has complete layout and there are no anomalies in it and from this
- information layout can be constructed and set in inode.
+ * DHT has only one child. In this case the entire layout is present on
+ this single child and hence we can set complete layout in inode.
+ * backend has complete layout and there are no anomalies in it and from
+ this information layout can be constructed and set in inode.
*/
-void
+static void
dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
gf_dirent_t *entry, gf_dirent_t *orig_entry)
{
@@ -6545,9 +6625,10 @@ out:
return;
}
-/* Posix returns op_errno = ENOENT to indicate that there are no more entries
+/* Posix returns op_errno = ENOENT to indicate that there are no more
+ * entries
*/
-int
+static int
dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -6603,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -6624,13 +6704,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
orig_entry->d_name, orig_entry->d_type);
if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- gf_msg_debug(this->name, EINVAL,
- "Invalid stat, ignoring entry "
- "%s gfid %s",
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
orig_entry->d_name,
uuid_utoa(orig_entry->d_stat.ia_gfid));
- continue;
}
if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
@@ -6712,6 +6791,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
}
}
} else {
+ if (orig_entry->dict &&
+ dict_get(orig_entry->dict, conf->link_xattr_name)) {
+ /* Strip out the S and T flags set by rebalance*/
+ DHT_STRIP_PHASE1_FLAGS(&entry->d_stat);
+ }
+
if (orig_entry->inode) {
ret = dht_layout_preset(this, prev, orig_entry->inode);
if (ret)
@@ -6833,7 +6918,7 @@ unwind:
return 0;
}
-int
+static int
dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -6956,7 +7041,7 @@ unwind:
return 0;
}
-int
+static int
dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t yoff, int whichop, dict_t *dict)
{
@@ -7080,7 +7165,7 @@ dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return 0;
}
-int
+static int
dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -7093,12 +7178,10 @@ dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1)
local->op_errno = op_errno;
-
- if (op_ret == 0)
+ else if (op_ret == 0)
local->op_ret = 0;
}
UNLOCK(&frame->lock);
-
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt))
DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
@@ -7212,7 +7295,7 @@ out:
return 0;
}
-int
+static int
dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -7263,7 +7346,7 @@ err:
return 0;
}
-int
+static int
dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, dev_t rdev,
mode_t mode, mode_t umask, dict_t *params)
@@ -7309,7 +7392,7 @@ out:
return 0;
}
-int32_t
+static int32_t
dht_mknod_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7359,7 +7442,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7367,7 +7450,7 @@ dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -7419,7 +7502,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7448,11 +7531,15 @@ dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_mknod_finish(frame, this, -1, 0);
+ if (local)
+ dht_mknod_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
dht_local_t *local = NULL;
@@ -7497,7 +7584,7 @@ err:
return -1;
}
-int
+static int
dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
int invoke_cbk)
{
@@ -7527,7 +7614,7 @@ dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
return 0;
}
-int
+static int
dht_refresh_parent_layout_done(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7548,7 +7635,7 @@ resume:
return 0;
}
-int
+static int
dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
{
call_frame_t *refresh_frame = NULL, *frame = NULL;
@@ -7583,7 +7670,7 @@ dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
return 0;
}
-int32_t
+static int32_t
dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7606,7 +7693,7 @@ dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
{
dht_local_t *local = NULL;
@@ -7917,7 +8004,58 @@ err:
return 0;
}
-int
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
+}
+
+static int
dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8033,7 +8171,7 @@ out:
return 0;
}
-int
+static int
dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -8089,7 +8227,7 @@ err:
return 0;
}
-int
+static int
dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -8192,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
local = frame->local;
if (!local) {
@@ -8200,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
+
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
+ }
+
goto out;
+ }
prev = cookie;
@@ -8256,7 +8460,7 @@ out:
return 0;
}
-int
+static int
dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -8307,7 +8511,7 @@ err:
return 0;
}
-int
+static int
dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd,
@@ -8322,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8330,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
if (avail_subvol != subvol) {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
@@ -8349,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8417,7 +8621,7 @@ out:
return ret;
}
-int32_t
+static int32_t
dht_create_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -8467,7 +8671,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8475,7 +8679,7 @@ dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -8527,7 +8731,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8556,7 +8760,11 @@ dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_create_finish(frame, this, -1, 0);
+ if (local)
+ dht_create_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -8606,6 +8814,60 @@ err:
}
int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
+
+int
dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
@@ -8631,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
"creating %s on %s (got create on %s)", local->loc.path,
@@ -8646,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (hashed_subvol && (hashed_subvol != subvol)) {
/* Create the linkto file and then the data file */
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = subvol;
local->hashed_subvol = hashed_subvol;
@@ -8662,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
* file as we expect a lookup everywhere if there are problems
* with the parent layout
*/
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, &local->loc, flags, mode, umask,
fd, params);
@@ -8713,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
-
loc_wipe(&local->loc);
ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
@@ -8755,7 +9016,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8789,7 +9050,7 @@ dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8857,13 +9118,13 @@ unlock:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata);
-int
+static int
dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *params)
{
@@ -8986,7 +9247,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -9003,8 +9264,6 @@ dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_boolean_t parent_layout_changed = _gf_false;
call_stub_t *stub = NULL;
- VALIDATE_OR_GOTO(this->private, err);
-
local = frame->local;
prev = cookie;
layout = local->layout;
@@ -9119,7 +9378,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, mode_t umask,
dict_t *params)
@@ -9270,7 +9529,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -9292,7 +9551,7 @@ dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9395,7 +9654,63 @@ err:
return 0;
}
-int
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
+
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
+
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
+
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
+}
+
+static int
dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -9530,63 +9845,7 @@ err:
return 0;
}
-int
-dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- DHT_STACK_DESTROY(frame);
- return 0;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
-{
- dht_local_t *local = NULL, *lock_local = NULL;
- call_frame_t *lock_frame = NULL;
- int lock_count = 0;
-
- local = frame->local;
-
- /* Unlock entrylk */
- dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
-
- /* Unlock inodelk */
- lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
- local->lock[0].ns.parent_layout.lk_count);
-
- if (lock_count == 0)
- goto done;
-
- lock_frame = copy_frame(frame);
- if (lock_frame == NULL)
- goto done;
-
- lock_local = dht_local_init(lock_frame, &local->loc, NULL,
- lock_frame->root->op);
- if (lock_local == NULL)
- goto done;
-
- lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
- .ns.parent_layout.locks;
- lock_local->lock[0]
- .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
-
- local->lock[0].ns.parent_layout.locks = NULL;
- local->lock[0].ns.parent_layout.lk_count = 0;
- dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
- lock_local->lock[0].ns.parent_layout.lk_count,
- dht_rmdir_unlock_cbk);
- lock_frame = NULL;
-
-done:
- if (lock_frame != NULL) {
- DHT_STACK_DESTROY(lock_frame);
- }
-
- return 0;
-}
-
-int
+static int
dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -9595,8 +9854,6 @@ dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int i = 0;
xlator_t *hashed_subvol;
- VALIDATE_OR_GOTO(this->private, err);
-
conf = this->private;
local = frame->local;
@@ -9629,7 +9886,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -9638,13 +9895,13 @@ dht_rmdir_do(call_frame_t *frame, xlator_t *this)
xlator_t *hashed_subvol = NULL;
char gfid[GF_UUID_BUF_SIZE] = {0};
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
+ VALIDATE_OR_GOTO(frame->local, err);
local = frame->local;
+ VALIDATE_OR_GOTO(this->private, out);
+ conf = this->private;
if (local->op_ret == -1)
- goto err;
+ goto out;
local->call_cnt = conf->subvolume_cnt;
@@ -9676,21 +9933,80 @@ dht_rmdir_do(call_frame_t *frame, xlator_t *this)
if (ret < 0) {
local->op_ret = -1;
local->op_errno = errno ? errno : EINVAL;
- goto err;
+ goto out;
}
return 0;
-err:
+out:
dht_set_fixed_dir_stat(&local->preparent);
dht_set_fixed_dir_stat(&local->postparent);
DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
&local->preparent, &local->postparent, NULL);
return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
}
-int
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
+}
+
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
+
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = readdirp_frame->local;
+
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
+
+ return 0;
+}
+
+static int
dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9734,7 +10050,7 @@ dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
@@ -9759,8 +10075,7 @@ dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup failed for %s on %s (type=0%o)", local->loc.path,
- src->name, stbuf->ia_type);
+ "lookup failed for %s on %s", local->loc.path, src->name);
goto err;
}
@@ -9790,7 +10105,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -9870,7 +10185,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
gf_dirent_t *entries, xlator_t *src)
{
@@ -9966,7 +10281,10 @@ dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
lookup_local->loc.path, src->name, gfid);
subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict);
- if (!subvol) {
+ if (!subvol || (subvol == src)) {
+ /* we need to delete the linkto file if it does not have a
+ * valid subvol or it points to itself.
+ */
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE,
"Linkfile does not have link subvolume. "
"path = %s, gfid = %s",
@@ -10028,36 +10346,7 @@ err:
* No more entries on this subvol. Proceed to the actual rmdir operation.
*/
-void
-dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
-{
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
-
- local = readdirp_frame->local;
- main_frame = local->main_frame;
- main_local = main_frame->local;
-
- /* At least one readdirp failed.
- * This is a bit hit or miss - if readdirp failed on more than
- * one subvol, we don't know which error is returned.
- */
- if (local->op_ret == -1) {
- main_local->op_ret = local->op_ret;
- main_local->op_errno = local->op_errno;
- }
-
- this_call_cnt = dht_frame_return(main_frame);
-
- if (is_last_call(this_call_cnt))
- dht_rmdir_do(main_frame, this);
-
- DHT_STACK_DESTROY(readdirp_frame);
-}
-
-int
+static int
dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *entries,
dict_t *xdata)
@@ -10107,34 +10396,7 @@ dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-/* Keep sending readdirp on the subvol until it returns no more entries
- * It is possible that not all entries will fit in a single readdirp in which
- * case the rmdir will keep failing with ENOTEMPTY
- */
-
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
-{
- dht_local_t *local = NULL;
-
- local = readdirp_frame->local;
-
- if (local->op_ret == -1) {
- /* there is no point doing another readdirp on this
- * subvol . */
- dht_rmdir_readdirp_done(readdirp_frame, this);
- return 0;
- }
-
- STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
- local->hashed_subvol, local->hashed_subvol,
- local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
- local->xattr);
-
- return 0;
-}
-
-int
+static int
dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -10254,6 +10516,8 @@ dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dht_conf_t *conf = NULL;
int op_errno = -1;
int i = -1;
+ int ret = -1;
+ dict_t *xattr_req = NULL;
VALIDATE_OR_GOTO(frame, err);
VALIDATE_OR_GOTO(this, err);
@@ -10285,14 +10549,37 @@ dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (flags) {
return dht_rmdir_do(frame, this);
}
+ if (xdata) {
+ xattr_req = dict_ref(xdata);
+ } else {
+ xattr_req = dict_new();
+ }
+ if (xattr_req) {
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ /* If parallel-readdir is enabled, this is required
+ * to handle stale linkto files in the directory
+ * being deleted. If this fails, log an error but
+ * do not prevent the operation.
+ */
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i],
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir, loc, local->fd,
- NULL);
+ xattr_req);
}
+ if (xattr_req) {
+ dict_unref(xattr_req);
+ }
return 0;
err:
@@ -10302,7 +10589,7 @@ err:
return 0;
}
-int
+static int
dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10362,7 +10649,7 @@ err:
return 0;
}
-int
+static int
dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10409,7 +10696,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
{
@@ -10526,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10572,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -10685,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
if (defrag->is_exiting)
goto unlock;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
gf_defrag_status_get(conf, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
else if (cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
- else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
- ret = gf_defrag_pause_tier(this, defrag);
- else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
- ret = gf_defrag_resume_tier(this, defrag);
}
unlock:
UNLOCK(&defrag->lock);
@@ -10747,6 +11019,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
}
if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
had come up, propagate CHILD_UP, but only this time
@@ -10773,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -10850,8 +11121,8 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
len += ret;
/* Calculation of total length of the string required to calloc
- * output_string. Log includes subvolume-name, start-range, end-range and
- * err value.
+ * output_string. Log includes subvolume-name, start-range, end-range
+ * and err value.
*
* This log will help to debug cases where:
* a) Different processes set different layout of a directory.
@@ -10862,8 +11133,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(string, sizeof(string),
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -10892,8 +11162,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(output_string + off, len - off,
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -10928,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -10984,72 +11231,13 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
return dht_fd_ctx_destroy(this, fd);
}
-int
-dht_remove_stale_linkto(void *data)
-{
- call_frame_t *frame = NULL;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *xdata_in = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", data, out);
-
- frame = data;
- local = frame->local;
- this = frame->this;
- GF_VALIDATE_OR_GOTO("dht", this, out);
- GF_VALIDATE_OR_GOTO("dht", local, out);
- GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
-
- xdata_in = dict_new();
- if (!xdata_in)
- goto out;
-
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Failed to set keys for stale linkto"
- "deletion on path %s",
- local->loc.path);
- goto out;
- }
-
- ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Removal of linkto failed"
- " on path %s at subvol %s",
- local->loc.path, local->link_subvol->name);
- }
-out:
- if (xdata_in)
- dict_unref(xdata_in);
- return ret;
-}
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
-{
- DHT_STACK_DESTROY(sync_frame);
- return 0;
-}
-
-int
+static int
dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -11135,6 +11323,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
@@ -11180,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
return 0;
}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 580f57e6e25..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -9,24 +9,21 @@
*/
#include <regex.h>
-#include <signal.h>
-#include <fnmatch.h>
#include "dht-mem-types.h"
#include "dht-messages.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "libxlator.h"
-#include "syncop.h"
-#include "refcount.h"
-#include "timer.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/refcount.h>
+#include <glusterfs/timer.h>
#include "protocol-common.h"
-#include "glusterfs-acl.h"
+#include <glusterfs/glusterfs-acl.h>
#ifndef _DHT_H
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -38,22 +35,21 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
+/* Virtual xattr for subvols status */
+
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
+
/* Virtual xattrs for debugging */
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
@@ -152,8 +148,8 @@ struct dht_rebalance_ {
dict_t *xdata;
dict_t *xattr;
dict_t *dict;
- int32_t set;
struct gf_flock flock;
+ int32_t set;
int lock_cmd;
};
@@ -176,24 +172,24 @@ typedef enum {
} dht_reaction_type_t;
struct dht_skip_linkto_unlink {
- gf_boolean_t handle_valid_link;
- int opend_fd_count;
xlator_t *hash_links_to;
uuid_t cached_gfid;
uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
};
typedef struct {
xlator_t *xl;
loc_t loc; /* contains/points to inode to lock on. */
- short type; /* read/write lock. */
char *domain; /* Only locks within a single domain
* contend with each other
*/
char *basename; /* Required for entrylk */
- gf_lkowner_t lk_owner;
gf_boolean_t locked;
dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
} dht_lock_t;
/* The lock structure represents inodelk. */
@@ -244,23 +240,10 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
- int call_cnt;
loc_t loc;
loc_t loc2;
+ int call_cnt;
int op_ret;
int op_errno;
int layout_mismatch;
@@ -274,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -290,9 +272,6 @@ struct dht_local {
xlator_t *cached_subvol;
xlator_t *hashed_subvol;
xlator_t *mds_subvol; /* This is use for dir only */
- char need_selfheal;
- char need_xattr_heal;
- char need_attrheal;
int file_count;
int dir_count;
call_frame_t *main_frame;
@@ -310,12 +289,12 @@ struct dht_local {
uint32_t overlaps_cnt;
uint32_t down;
uint32_t misc;
- uint32_t missing_cnt;
dht_selfheal_dir_cbk_t dir_cbk;
dht_selfheal_layout_t healer;
dht_need_heal_t should_heal;
- gf_boolean_t force_mkdir;
dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
} selfheal;
dht_refresh_layout_unlock refresh_layout_unlock;
@@ -323,7 +302,13 @@ struct dht_local {
uint32_t uid;
uint32_t gid;
- pid_t pid;
+ pid_t pid;
+
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
/* needed by nufa */
int32_t flags;
@@ -331,44 +316,25 @@ struct dht_local {
dev_t rdev;
mode_t umask;
- /* need for file-info */
- char *xattr_val;
- char *key;
-
/* which xattr request? */
char xsel[256];
int32_t alloc_len;
/* gfid related */
uuid_t gfid;
+ uuid_t gfid_req;
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
-
- glusterfs_fop_t fop;
-
- gf_boolean_t linked;
xlator_t *link_subvol;
struct dht_rebalance_ rebalance;
xlator_t *first_up_subvol;
- gf_boolean_t quota_deem_statfs;
-
- gf_boolean_t added_link;
- gf_boolean_t is_linkfile;
-
struct dht_skip_linkto_unlink skip_unlink;
dht_dir_transaction_t lock[2], *current;
/* inodelks during filerename for backward compatibility */
dht_lock_t **rename_inodelk_backward_compatible;
- int rename_inodelk_bc_count;
-
- short lock_type;
call_stub_t *stub;
int32_t parent_disk_layout[4];
@@ -376,15 +342,30 @@ struct dht_local {
/* rename rollback */
int *ret_cache;
- /* fd open check */
- gf_boolean_t fd_checked;
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
/* This is use only for directory operation */
int32_t valid;
- gf_boolean_t heal_layout;
int32_t mds_heal_fresh_lookup;
- loc_t loc2_copy;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
gf_boolean_t locked;
gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
@@ -408,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -466,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- int percent_full;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- tier_pause_state_t pause_state;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -554,26 +459,18 @@ struct gf_defrag_info_ {
uint64_t num_dirs_processed;
uint64_t size_processed;
gf_lock_t lock;
- int cmd;
pthread_t th;
- gf_defrag_status_t defrag_status;
struct rpc_clnt *rpc;
uint32_t connected;
uint32_t is_exiting;
pid_t pid;
+ int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
- gf_boolean_t stats;
+ time_t start_time;
uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -588,18 +485,20 @@ struct gf_defrag_info_ {
/*Throttle params*/
/*stands for reconfigured thread count*/
int32_t recon_thread_count;
- /*stands for current running thread count*/
- int32_t current_thread_count;
pthread_cond_t df_wakeup_thread;
- /* lock migration flag */
- gf_boolean_t lock_migration_enabled;
-
/* backpointer to make it easier to write functions for rebalance */
xlator_t *this;
pthread_cond_t fc_wakeup_cond;
pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -607,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -615,36 +513,26 @@ struct dht_methods_s {
typedef struct dht_methods_s dht_methods_t;
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
xlator_t **subvolumes;
char *subvolume_status;
int *last_event;
dht_layout_t **file_layouts;
dht_layout_t **dir_layouts;
unsigned int search_unhashed;
- gf_boolean_t lookup_optimize;
int gen;
dht_du_t *du_stats;
double min_free_disk;
double min_free_inodes;
- char disk_unit;
+ int subvolume_cnt;
int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
time_t *subvol_up_time;
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
/* to keep track of nodes which are decommissioned */
xlator_t **decommissioned_bricks;
int decommission_in_progress;
@@ -653,15 +541,9 @@ struct dht_conf {
/* defrag related */
gf_defrag_info_t *defrag;
- /* Request to filter directory entries in readdir request */
-
- gf_boolean_t readdir_optimize;
-
/* Support regex-based name reinterpretation. */
regex_t rsync_regex;
- gf_boolean_t rsync_regex_valid;
regex_t extra_regex;
- gf_boolean_t extra_regex_valid;
/* Support variable xattr names. */
char *xattr_name;
@@ -670,11 +552,6 @@ struct dht_conf {
char *commithash_xattr_name;
char *wild_xattr_name;
- /* Support size-weighted rebalancing (heterogeneous bricks). */
- gf_boolean_t do_weighting;
- gf_boolean_t randomize_by_gfid;
- int dthrottle;
-
dht_methods_t methods;
struct mem_pool *lock_pool;
@@ -684,24 +561,55 @@ struct dht_conf {
subvol_nodeuuids_info_t *local_nodeuuids;
int32_t local_subvols_cnt;
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
/*
* "Commit hash" for this volume topology. Changed whenever bricks
* are added or removed.
*/
uint32_t vol_commit_hash;
- gf_boolean_t vch_forced;
- /* lock migration */
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
gf_boolean_t lock_migration_enabled;
- gf_lock_t lock;
- /* Hard link handle requirement for migration triggered from client*/
- synclock_t link_lock;
+ gf_boolean_t vch_forced;
gf_boolean_t use_fallocate;
gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
@@ -740,6 +648,8 @@ struct dir_dfmeta {
struct list_head **head;
struct list_head **iterator;
int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
};
typedef struct dht_migrate_info {
@@ -815,22 +725,18 @@ typedef struct dht_fd_ctx {
dht_local_wipe(__xl, __local); \
} while (0)
-#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) \
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
do { \
- LOCK(&inode->lock); \
- { \
- if (ctx_sec == new_sec) \
- new_nsec = max(new_nsec, ctx_nsec); \
- else if (ctx_sec > new_sec) { \
- new_sec = ctx_sec; \
- new_nsec = ctx_nsec; \
- } \
- if (post) { \
- ctx_sec = new_sec; \
- ctx_nsec = new_nsec; \
- } \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
} \
- UNLOCK(&inode->lock); \
} while (0)
#define is_greater_time(a, an, b, bn) \
@@ -875,7 +781,6 @@ dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
int
dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr);
-
xlator_t *
dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
dict_t *xattr);
@@ -893,9 +798,6 @@ int
dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
int32_t **disk_layout_p);
int
-dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
- void *disk_layout_raw, int disk_layout_len);
-int
dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, int32_t **disk_layout_p);
@@ -931,25 +833,17 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
loc_t *loc);
int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc);
-int
dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
int
dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-
-int
-dht_selfheal_directory_for_nameless_lookup(call_frame_t *frame,
- dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-
int
dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
dht_layout_t *layout);
int
dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-int
+void
dht_layout_sort_volname(dht_layout_t *layout);
int
@@ -966,14 +860,14 @@ dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
int
dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
int
-dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
-int
dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
;
void
dht_layout_unref(xlator_t *this, dht_layout_t *layout);
dht_layout_t *
dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
xlator_t *
dht_first_up_subvol(xlator_t *this);
xlator_t *
@@ -1228,25 +1122,19 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
int
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
-
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf);
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *defrag);
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag);
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
@@ -1279,10 +1167,6 @@ int
dht_dir_attr_heal(void *data);
int
dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
-int
-dht_dir_has_layout(dict_t *xattr, char *name);
-gf_boolean_t
-dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
xlator_t *ignore, dht_layout_t *layout,
@@ -1291,15 +1175,18 @@ xlator_t *
dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
dht_layout_t *layout);
int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
-void
-dht_layout_dump(dht_layout_t *layout, const char *prefix);
int32_t
dht_priv_dump(xlator_t *this);
int32_t
dht_inodectx_dump(xlator_t *this, inode_t *inode);
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
int
dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
xlator_t **src_subvol, xlator_t **dst_subvol);
@@ -1313,11 +1200,6 @@ dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
void
dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
dht_layout_t *layout);
-int
-dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-
-int
-dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict);
int
dht_layout_sort(dht_layout_t *layout);
@@ -1334,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1367,22 +1246,6 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
int
dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
-void
-dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
- unsigned long frsize);
-
-int
-add_opt(char **optsp, const char *opt);
-
-int
-dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value);
-
-int
-dht_remove_stale_linkto(void *data);
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data);
-
int
dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
@@ -1454,9 +1317,6 @@ dht_dir_heal_xattrs(void *data);
int
dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
-void
-dht_aggregate_xattr(dict_t *dst, dict_t *src);
-
int32_t
dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
@@ -1468,25 +1328,12 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
-
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst);
-
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int
dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
int
-dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata);
-
-int
-dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
-int
dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
int
@@ -1495,14 +1342,6 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
/* Abstract out the DHT-IATT-IN-DICT */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req);
-
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf);
-
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2);
void
dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
@@ -1523,4 +1362,23 @@ int
dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata);
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 148b4f01133..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -10,14 +10,10 @@
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "dht-messages.h"
-#include "defaults.h"
#include <sys/time.h>
-#include "events.h"
+#include <glusterfs/events.h>
int
dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
@@ -155,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -202,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 2f15c0370cc..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,13 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "hashfn.h"
+#include <glusterfs/hashfn.h>
-int
-dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
int ret = 0;
uint32_t hash = 0;
@@ -22,7 +21,7 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
case DHT_HASH_TYPE_DM_USER:
- hash = gf_dm_hashfn(name, strlen(name));
+ hash = gf_dm_hashfn(name, len);
break;
default:
ret = -1;
@@ -36,7 +35,12 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
return ret;
}
-static gf_boolean_t
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
{
regmatch_t matches[2] = {
@@ -54,14 +58,14 @@ dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
if (new_len < len) {
memcpy(modified, original + matches[1].rm_so, new_len);
modified[new_len] = '\0';
- return _gf_true;
+ return new_len + 1; /* +1 for the terminating NULL */
}
}
}
/* This is guaranteed safe because of how the dest was allocated. */
strcpy(modified, original);
- return _gf_false;
+ return 0;
}
int
@@ -70,36 +74,37 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
char *rsync_friendly_name = NULL;
dht_conf_t *priv = NULL;
size_t len = 0;
- gf_boolean_t munged = _gf_false;
+ int munged = 0;
priv = this->private;
+ if (name == NULL)
+ return -1;
+
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+
LOCK(&priv->lock);
{
if (priv->extra_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
munged = dht_munge_name(name, rsync_friendly_name, len,
&priv->extra_regex);
}
if (!munged && priv->rsync_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
gf_msg_trace(this->name, 0, "trying regex for %s", name);
munged = dht_munge_name(name, rsync_friendly_name, len,
&priv->rsync_regex);
- if (munged) {
- gf_msg_debug(this->name, 0, "munged down to %s",
- rsync_friendly_name);
- }
}
}
UNLOCK(&priv->lock);
-
- if (!munged) {
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
rsync_friendly_name = (char *)name;
}
- return dht_hash_compute_internal(type, rsync_friendly_name, hash_p);
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 22ce8bc4d4c..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,10 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
static void
dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
@@ -65,8 +64,8 @@ __dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
ret = __fd_ctx_set(fd, this, value);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
- "Failed to set fd ctx in fd=0x%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
GF_REF_PUT(fd_ctx);
}
out:
@@ -94,12 +93,13 @@ dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
goto unlock;
} else {
/* This would be a big problem*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
- "Different dst found in the fd ctx");
-
/* Overwrite and hope for the best*/
fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
- goto unlock;
+ UNLOCK(&fd->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
+
+ goto out;
}
}
ret = __dht_fd_ctx_set(this, fd, dst);
@@ -124,13 +124,13 @@ dht_fd_ctx_get(xlator_t *this, fd_t *fd)
{
ret = __fd_ctx_get(fd, this, &tmp_val);
if ((ret < 0) || (tmp_val == 0)) {
- UNLOCK(&fd->lock);
- goto out;
+ goto unlock;
}
fd_ctx = (dht_fd_ctx_t *)(uintptr_t)tmp_val;
GF_REF_GET(fd_ctx);
}
+unlock:
UNLOCK(&fd->lock);
out:
@@ -365,10 +365,27 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
break;
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -428,10 +445,22 @@ handle_err:
DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
break;
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -484,10 +513,9 @@ dht_check_and_open_fd_on_subvol_task(void *data)
fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "Failed to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
/* This can happen if the cached subvol was updated in the
* inode_ctx and the fd was opened on the new cached suvol
* after this fop was wound on the old cached subvol.
@@ -533,10 +561,8 @@ dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
dht_check_and_open_fd_on_subvol_complete, frame, frame);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0,
- "Failed to create synctask"
- " to check and open fd=%p",
- local->fd);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
}
return ret;
@@ -645,9 +671,7 @@ dht_get_subvol_from_id(xlator_t *this, int client_id)
ret = gf_asprintf(&sid, "%d", client_id);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED,
- "asprintf failed while "
- "fetching subvol from the id");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
goto out;
}
@@ -1260,6 +1284,7 @@ dht_migration_complete_check_task(void *data)
fd_t *tmp = NULL;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
int open_failed = 0;
this = THIS;
@@ -1306,9 +1331,9 @@ dht_migration_complete_check_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
@@ -1329,10 +1354,9 @@ dht_migration_complete_check_task(void *data)
ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- this->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
local->op_errno = -ret;
ret = -1;
goto out;
@@ -1340,18 +1364,15 @@ dht_migration_complete_check_task(void *data)
dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
if (linkto_target && dst_node != linkto_target) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
- "linkto target (%s) is "
- "different from cached-subvol (%s). Treating %s as "
- "destination subvol",
- linkto_target->name, dst_node->name, dst_node->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
ret = -1;
local->op_errno = EIO;
goto out;
@@ -1398,24 +1419,34 @@ dht_migration_complete_check_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1423,12 +1454,10 @@ dht_migration_complete_check_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed"
- " to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
open_failed = 1;
local->op_errno = -ret;
@@ -1437,9 +1466,11 @@ dht_migration_complete_check_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1452,6 +1483,10 @@ dht_migration_complete_check_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
out:
if (dict) {
@@ -1533,6 +1568,7 @@ dht_rebalance_inprogress_task(void *data)
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
this = THIS;
frame = data;
@@ -1575,9 +1611,9 @@ dht_rebalance_inprogress_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
}
@@ -1586,17 +1622,16 @@ dht_rebalance_inprogress_task(void *data)
}
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
- "%s: failed to get the 'linkto' xattr", local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
if (!dst_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
@@ -1613,20 +1648,17 @@ dht_rebalance_inprogress_task(void *data)
/* lookup on dst */
ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
@@ -1653,24 +1685,40 @@ dht_rebalance_inprogress_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
+
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1678,11 +1726,10 @@ dht_rebalance_inprogress_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed to send open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
ret = -1;
open_failed = 1;
} else {
@@ -1691,9 +1738,11 @@ dht_rebalance_inprogress_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1701,6 +1750,10 @@ dht_rebalance_inprogress_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
if (open_failed) {
ret = -1;
goto out;
@@ -1708,9 +1761,8 @@ unlock:
ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "%s: failed to set inode-ctx target file at %s", local->loc.path,
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
goto out;
}
@@ -1803,12 +1855,16 @@ dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
time = &ctx->time;
- DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
- stat->ia_mtime_nsec, inode, post);
- DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
- stat->ia_ctime_nsec, inode, post);
- DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
- stat->ia_atime_nsec, inode, post);
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
ret = dht_inode_ctx_set(inode, this, ctx);
out:
@@ -1877,9 +1933,7 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
};
char *bname = NULL;
char *save_ptr = NULL;
- uuid_t gfid = {
- 0,
- };
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *tmp_path = NULL;
tmp_path = gf_strdup(path);
@@ -1887,9 +1941,6 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
goto out;
}
- memset(gfid, 0, 16);
- gfid[15] = 1;
-
gf_uuid_copy(loc.pargfid, gfid);
loc.parent = inode_ref(itable->root);
@@ -1918,10 +1969,10 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
*/
linked_inode = loc.inode;
bname = strtok_r(NULL, "/", &save_ptr);
- inode_unref(loc.parent);
if (!bname) {
goto out;
}
+ inode_unref(loc.parent);
loc.parent = loc.inode;
gf_uuid_copy(loc.pargfid, loc.inode->gfid);
loc.inode = NULL;
@@ -1933,10 +1984,9 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Healing of path %s failed on subvolume %s for "
- "directory %s",
- path, this->name, bname);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
goto out;
}
@@ -1994,10 +2044,8 @@ dht_heal_full_path(void *data)
ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Failed to get path from subvol %s. Aborting "
- "directory healing.",
- source->name);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
goto out;
}
@@ -2035,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2044,10 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s ", local->loc.path);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
@@ -2134,16 +2185,15 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
ret = __dht_lock_subvol_set(inode, this, cached_subvol);
if (ret) {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set lock_subvol in "
- "inode ctx for gfid %s",
- gfid);
- goto unlock;
+ UNLOCK(&inode->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
+ goto post_unlock;
}
subvol = cached_subvol;
}
-unlock:
UNLOCK(&inode->lock);
+post_unlock:
if (!subvol && inode && lock->l_type != F_UNLCK) {
inode_unref(inode);
}
@@ -2167,8 +2217,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode = local->loc.inode ? local->loc.inode : local->fd->inode;
}
if (!inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Found a NULL inode. Failed to unref the inode");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
goto out;
}
@@ -2194,11 +2244,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode_unref(inode);
} else {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Unlock request failed for gfid %s."
- "Failed to unref the inode",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
goto out;
}
default:
@@ -2220,12 +2267,11 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
- "src or dst is NULL. Failed to set "
- " dictionary value for path %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
return;
}
/* Check if any user xattr present in src dict and set
@@ -2236,17 +2282,18 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
luflag = 1;
ret = dict_set(dst, xattrs_to_heal[i], keyval);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- xattrs_to_heal[i], local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
keyval = NULL;
}
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index f46370a9208..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -10,25 +10,25 @@
#include "dht-common.h"
-int
+static int
dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret);
-int
+static int
dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -67,7 +67,7 @@ out:
return 0;
}
-int
+static int
dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local = frame->local;
prev = cookie;
- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) &&
- !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -216,7 +216,7 @@ err:
return 0;
}
-int
+static int
dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -258,7 +258,7 @@ out:
return 0;
}
-int
+static int
dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *stbuf, dict_t *xdata)
{
@@ -272,19 +272,19 @@ dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
dht_iatt_merge(this, &local->stbuf, stbuf);
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -473,7 +473,7 @@ out:
return 0;
}
-int
+static int
dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -558,7 +558,7 @@ err:
return 0;
}
-int
+static int
dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -606,7 +606,7 @@ out:
return 0;
}
-int
+static int
dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -735,7 +735,7 @@ out:
return 0;
}
-int
+static int
dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_errno = op_errno;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -881,7 +881,7 @@ out:
return 0;
}
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -959,7 +959,7 @@ err:
/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
indicate that lock migration happened on the fd, so we can consider it as
phase 2 of migration */
-int
+static int
dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_flock *flock, dict_t *xdata)
{
@@ -1006,7 +1006,7 @@ out:
return 0;
}
-int
+static int
dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1087,7 +1087,7 @@ err:
return 0;
}
-int
+static int
dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_lease *lease, dict_t *xdata)
{
@@ -1129,7 +1129,7 @@ err:
}
/* Symlinks are currently not migrated, so no need for any check here */
-int
+static int
dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, const char *path, struct iatt *stbuf,
dict_t *xdata)
@@ -1192,6 +1192,29 @@ err:
return 0;
}
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
+
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
+
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
+
+ return ret;
+}
+
int
dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
@@ -1223,7 +1246,14 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
- ret = dht_read_iatt_from_xdata(this, xdata, &stbuf);
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
if ((!op_ret) && (ret)) {
/* This is a potential problem and can cause corruption
@@ -1275,7 +1305,7 @@ out:
return 0;
}
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret)
{
@@ -1334,7 +1364,7 @@ out:
return 0;
}
-int
+static int
dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1342,6 +1372,22 @@ dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
+
int
dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
@@ -1384,7 +1430,7 @@ dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0,
@@ -1406,7 +1452,7 @@ err:
return 0;
}
-int
+static int
dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1454,7 +1500,7 @@ dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
@@ -1479,7 +1525,7 @@ err:
* below fops, hence not implementing 'migration' related checks
*/
-int
+static int
dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1535,8 +1581,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ local = frame->local;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+out:
dht_lk_inode_unref(frame, op_ret);
DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+
return 0;
}
@@ -1574,6 +1638,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
if (ret)
goto err;
*/
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index d0d12fd7658..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -10,17 +10,17 @@
#include "dht-common.h"
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could be a valid bad fd error.
*/
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
@@ -142,7 +140,7 @@ out:
return 0;
}
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -262,8 +260,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) &&
- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -336,7 +334,7 @@ err:
return 0;
}
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -489,7 +487,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -555,7 +553,7 @@ err:
return 0;
}
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -666,7 +664,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -731,7 +729,7 @@ err:
return 0;
}
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -838,7 +836,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -902,7 +900,7 @@ err:
return 0;
}
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1005,8 +1003,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -1049,7 +1047,7 @@ out:
return 0;
}
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1113,9 +1111,10 @@ dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
dht_iatt_merge(this, &local->prebuf, statpre);
@@ -1124,9 +1123,8 @@ dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_ret = 0;
local->op_errno = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
if (local->op_ret == 0)
@@ -1151,24 +1149,22 @@ dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ if (op_ret == -1) {
+ gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
+
LOCK(&frame->lock);
{
- if (op_ret == -1) {
- gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
- prev->name);
-
- goto unlock;
- }
-
dht_iatt_merge(this, &local->prebuf, statpre);
dht_iatt_merge(this, &local->stbuf, statpost);
local->op_ret = 0;
local->op_errno = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 544b9638104..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,11 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "byte-order.h"
-#include "dht-messages.h"
+#include <glusterfs/byte-order.h>
#include "unittest/unittest.h"
#define layout_base_size (sizeof(dht_layout_t))
@@ -134,9 +131,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
ret = dht_hash_compute(this, layout->type, name, &hash);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
- "hash computation failed for type=%d name=%s", layout->type,
- name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
goto out;
}
@@ -148,8 +144,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "no subvolume for hash (value) = %u", hash);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
}
out:
@@ -258,7 +254,7 @@ dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
return dht_disk_layout_extract(this, layout, i, disk_layout_p);
}
-int
+static int
dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
void *disk_layout_raw, int disk_layout_len)
{
@@ -269,8 +265,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
int disk_layout[4];
if (!disk_layout_raw) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "error no layout on disk for merge");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
return -1;
}
@@ -287,10 +283,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
case DHT_HASH_TYPE_DM:
break;
default:
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: "
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
return -1;
}
@@ -302,9 +296,10 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
layout->list[pos].start = start_off;
layout->list[pos].stop = stop_off;
- gf_msg_trace(
- this->name, 0, "merged to layout: %u - %u (type %d, hash %d) from %s",
- start_off, stop_off, commit_hash, type, layout->list[pos].xlator->name);
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
return 0;
}
@@ -357,8 +352,8 @@ dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
disk_layout_len);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "layout merge from subvolume %s failed", subvol->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
goto out;
}
@@ -417,8 +412,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j)
layout->list[j].start = start_swap;
layout->list[j].stop = stop_swap;
}
-
-int64_t
+static int64_t
dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
@@ -441,7 +435,7 @@ dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
return _gf_false;
}
-int64_t
+static int64_t
dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
@@ -477,7 +471,7 @@ dht_layout_sort(dht_layout_t *layout)
return 0;
}
-int
+void
dht_layout_sort_volname(dht_layout_t *layout)
{
int i = 0;
@@ -493,8 +487,6 @@ dht_layout_sort_volname(dht_layout_t *layout)
dht_layout_entry_swap(layout, i, j);
}
}
-
- return 0;
}
void
@@ -627,8 +619,8 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_sort(layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sort failed?! how the ....");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
goto out;
}
@@ -644,10 +636,9 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
" gfid = %s",
loc->path, gfid);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
- "Found anomalies in %s (gfid = %s). "
- "Holes=%d overlaps=%d",
- loc->path, gfid, holes, overlaps);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
}
ret = -1;
}
@@ -714,12 +705,11 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (!xattr) {
if (err == 0) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "%s: xattr dictionary is NULL", loc->path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "path not found: "
- "xattr dictionary is NULL");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
}
ret = -1;
}
@@ -731,13 +721,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (dict_ret < 0) {
if (err == 0 && layout->list[pos].stop) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "%s: Disk layout missing, gfid = %s", loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "path not found: "
- "Disk layout missing, gfid = %s",
- gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
}
ret = -1;
}
@@ -753,13 +743,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if ((layout->list[pos].start != start_off) ||
(layout->list[pos].stop != stop_off) ||
(layout->list[pos].commit_hash != commit_hash)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO,
- "subvol: %s; inode layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32
- "; "
- "disk layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32,
- layout->list[pos].xlator->name, layout->list[pos].start,
- layout->list[pos].stop, layout->list[pos].commit_hash, start_off,
- stop_off, commit_hash);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
ret = 1;
} else {
ret = 0;
@@ -781,9 +771,8 @@ dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
layout = dht_layout_for_subvol(this, subvol);
if (!layout) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
ret = -1;
goto out;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index eb1695f7e05..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,13 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "dht-common.h"
-#include "dht-messages.h"
-int
+static int
dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -37,17 +34,16 @@ dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
- "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
out:
local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
stbuf, postparent, postparent, xattr);
return 0;
}
-#define is_equal(a, b) ((a) == (b))
-int
+static int
dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -76,9 +72,8 @@ dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value. key : %s",
- conf->link_xattr_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
goto out;
}
@@ -128,27 +123,23 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: "
- "key = gfid-req, gfid = %s ",
- loc->path, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
gf_uuid_unparse(loc->gfid, gfid);
}
ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
- "%s: failed to initialize linkfile data, gfid = %s", loc->path,
- gfid);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -189,10 +180,9 @@ dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
- "Unlinking linkfile %s (gfid = %s)on "
- "subvolume %s failed ",
- local->loc.path, gfid, subvol->name);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -260,7 +250,7 @@ out:
return subvol;
}
-int
+static int
dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata)
@@ -272,10 +262,9 @@ dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
loc = &local->loc;
if (op_ret)
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
- "Failed to set attr uid/gid on %s"
- " :<gfid:%s> ",
- (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
DHT_STACK_DESTROY(frame);
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index f9bac4f97c8..638821ccee5 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -44,7 +44,8 @@ dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
if (!lk_buf)
goto out;
- gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf);
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
GF_FREE(lk_buf);
}
@@ -313,11 +314,9 @@ dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
}
DHT_STACK_DESTROY(frame);
@@ -339,9 +338,10 @@ dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
}
@@ -375,9 +375,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -385,9 +385,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -446,21 +446,17 @@ dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -700,9 +696,10 @@ dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
gfid);
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
}
@@ -727,11 +724,9 @@ dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -762,9 +757,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -772,9 +767,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -834,21 +829,17 @@ dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -1039,13 +1030,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1060,13 +1050,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1077,11 +1066,11 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(
+ gf_smsg(
this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s, gfid:%s",
+ "subvol=%s",
local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
- gfid);
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
}
@@ -1153,19 +1142,16 @@ dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
- "memory allocation failed for lock_frame. gfid:%s"
- " path:%s",
- gfid, tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
- "dht_local_lock_init failed, gfid: %s path:%s", gfid,
- tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
@@ -1246,11 +1232,10 @@ dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_ret = -1;
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_ENTRYLK_ERROR,
- "%s (%s/%s): "
- "dht_blocking_entrylk failed after taking inodelk",
- gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
goto err;
}
@@ -1310,10 +1295,9 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
- "gfid:%s (name:%s) (path: %s): "
- "parent loc build failed",
- loc->gfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
goto out;
}
gf_uuid_unparse(parent.gfid, pgfid);
@@ -1322,10 +1306,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (inodelk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
goto out;
}
@@ -1334,10 +1318,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (inodelk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "inodelk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
inodelk->lk_count = count;
@@ -1346,10 +1330,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (entrylk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1359,10 +1343,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (entrylk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1376,11 +1360,11 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
dht_blocking_entrylk_after_inodelk);
if (ret < 0) {
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_INODELK_ERROR,
- "%s (%s/%s) (path: %s): "
- "dht_blocking_inodelk failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
index 802970adb3b..6485c03fb6e 100644
--- a/xlators/cluster/dht/src/dht-lock.h
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -11,7 +11,6 @@
#ifndef _DHT_LOCK_H
#define _DHT_LOCK_H
-#include "xlator.h"
#include "dht-common.h"
void
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 5b728f86c95..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __DHT_MEM_TYPES_H__
#define __DHT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_dht_mem_types_ {
gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
@@ -20,23 +20,17 @@ enum gf_dht_mem_types_ {
gf_dht_mt_int32_t,
gf_dht_mt_xlator_t,
gf_dht_mt_dht_layout_t,
- gf_switch_mt_dht_conf_t,
- gf_switch_mt_dht_du_t,
gf_switch_mt_switch_sched_array,
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
gf_defrag_info_mt,
gf_dht_mt_inode_ctx_t,
- gf_dht_mt_ctx_stat_time_t,
gf_dht_mt_dirent_t,
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 005ab57b505..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -10,7 +10,7 @@
#ifndef _DHT_MESSAGES_H_
#define _DHT_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -79,6 +77,310 @@ GLFS_MSGID(
DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
- DHT_MSG_NON_HASHED_SUBVOL_DOWN);
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 41a587823a9..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -9,16 +9,15 @@
*/
#include "dht-common.h"
-#include "xlator.h"
-#include "syscall.h"
-#include <signal.h>
+#include <glusterfs/syscall.h>
#include <fnmatch.h>
#include <signal.h>
-#include "events.h"
+#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
@@ -46,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
if (meta) {
for (i = 0; i < local_subvols_cnt; i++) {
- gf_dirent_free(&meta->equeue[i]);
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
}
GF_FREE(meta->equeue);
@@ -54,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
GF_FREE(meta->iterator);
GF_FREE(meta->offset_var);
GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
GF_FREE(meta);
}
}
@@ -85,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
return;
}
-static gf_boolean_t
-dht_is_tier_command(int cmd)
-{
- gf_boolean_t is_tier = _gf_false;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STATUS_TIER:
- case GF_DEFRAG_CMD_START_DETACH_TIER:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_PAUSE_TIER:
- case GF_DEFRAG_CMD_RESUME_TIER:
- is_tier = _gf_true;
- break;
- default:
- break;
- }
- return is_tier;
-}
-
static int
dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
{
@@ -113,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
char *tmpstr = NULL;
char *ptr = NULL;
char *suffix = "-dht";
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
int len = 0;
eventtypes_t event = EVENT_LAST;
@@ -133,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
break;
}
- if (dht_is_tier_command(cmd)) {
- /* We should have the tier volume name*/
- conf = this->private;
- defrag = conf->defrag;
- volname = defrag->tier_conf.volname;
- } else {
- /* DHT volume */
- len = strlen(this->name) - strlen(suffix);
- tmpstr = gf_strdup(this->name);
- if (tmpstr) {
- ptr = tmpstr + len;
- if (!strcmp(ptr, suffix)) {
- tmpstr[len] = '\0';
- volname = tmpstr;
- }
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
}
@@ -173,75 +147,6 @@ dht_strip_out_acls(dict_t *dict)
}
}
-static int
-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref,
- int *fop_errno)
-{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
-
- if (write_needed) {
- ret = syncop_write(
- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
- (offset + tmp_offset), iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
-
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write(to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset), (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
- }
-
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
- }
-
- ret = size;
-out:
- return ret;
-}
-
/*
return values:
-1 : failure
@@ -649,7 +554,7 @@ out:
static int
__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- int *fop_errno)
+ int *fop_errno, int file_has_holes)
{
int ret = -1;
int ret2 = -1;
@@ -704,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -818,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
/* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- if (conf->use_fallocate) {
+ if (conf->use_fallocate && !file_has_holes) {
ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
if (ret < 0) {
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
@@ -845,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
}
- }
-
- if (!conf->use_fallocate) {
+ } else {
ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
NULL);
if (ret < 0) {
@@ -875,7 +775,7 @@ out:
dict_unref(dict);
if (xdata)
- dict_unref(dict);
+ dict_unref(xdata);
return ret;
}
@@ -1098,32 +998,103 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ size_t data_block_size = 0;
dict_t *xdata = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
+
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
+ }
+
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
+ break;
+ }
+
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
+ }
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
+
if (!ret || (ret < 0)) {
- *fop_errno = -ret;
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
break;
}
- if (hole_exists) {
- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
- iobref, fop_errno);
- } else {
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
+ if (!xdata) {
xdata = dict_new();
if (!xdata) {
gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
@@ -1143,7 +1114,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
* https://github.com/gluster/glusterfs/issues/308
* for more details.
*/
- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
if (ret) {
gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
"failed to set dict");
@@ -1152,22 +1123,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
}
-
- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
- NULL, xdata, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- }
- }
-
- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused");
- ret = -1;
}
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
+ *fop_errno = -ret;
break;
}
@@ -1561,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1571,20 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -1627,6 +1575,10 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path);
}
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
if (ret < 0) {
ret = -1;
@@ -1727,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
- fop_errno);
+ fop_errno, file_has_holes);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"Create dst failed"
@@ -1773,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* destination. We need to do update this only post migration
* as in case of failure the linkto needs to point to the source
* subvol */
- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
- &dst_fd, fop_errno);
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Create dst failed"
@@ -1861,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = 0;
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes,
@@ -1878,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2321,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2341,11 +2300,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
@@ -2440,15 +2403,12 @@ void
dht_build_root_inode(xlator_t *this, inode_t **inode)
{
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
itable = inode_table_new(0, this);
if (!itable)
return;
- root_gfid[15] = 1;
*inode = inode_find(itable, root_gfid);
}
@@ -2578,10 +2538,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2603,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2621,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2675,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2729,17 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2902,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3082,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
- int ret = -1;
+ int ret = 0;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
struct dht_container *tmp_container = NULL;
@@ -3100,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
}
if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]), xattr_req, NULL);
@@ -3259,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data, int *perrno)
{
int ret = -1;
- fd_t *fd = NULL;
dht_conf_t *conf = NULL;
gf_dirent_t entries;
dict_t *xattr_req = NULL;
@@ -3280,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3293,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- fd = fd_create(loc->inode, defrag->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
ret = -1;
goto out;
}
- ret = syncop_opendir(this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s", loc->path);
- *perrno = -ret;
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
ret = -1;
+ *perrno = ENOMEM;
goto out;
}
- fd_bind(fd);
- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
- if (!dir_dfmeta) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
- ret = -1;
- goto out;
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
}
dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
@@ -3349,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
}
+
ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -3361,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
gf_common_mt_int);
if (!dir_dfmeta->fetch_entries) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
ret = -1;
goto out;
}
@@ -3431,8 +3435,13 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
- defrag, fd, migrate_data, dir_dfmeta,
- xattr_req, &should_commit_hash, perrno);
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
if (ret) {
gf_log(this->name, GF_LOG_WARNING,
@@ -3472,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3500,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3514,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3560,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3687,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3790,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3816,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3880,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3893,7 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
migrate_data);
- if (ret && ret != 2) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
+
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3916,7 +3800,25 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
INIT_LIST_HEAD(&entries.list);
}
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
if (ret) {
if (-ret == ENOENT || -ret == ESTALE) {
gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
@@ -3927,6 +3829,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
defrag->total_failures++;
}
ret = 0;
+ goto out;
} else {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
"Setxattr failed for %s", loc->path);
@@ -3941,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -3961,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -3996,245 +3893,34 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- return ret;
-}
-
-/******************************************************************************
- * Tier background Fix layout functions
- ******************************************************************************/
-/* This is the background tier fixlayout thread */
-void *
-gf_tier_do_fix_layout(void *args)
-{
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
- int ret = -1;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
- struct iatt parent = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out);
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out);
- this = tier_fix_layout_arg->this;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out);
-
- GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out);
-
- /* Get Root loc_t */
- dht_build_root_loc(defrag->root_inode, &loc);
- ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
- "Lookup on root failed.");
- ret = -1;
- goto out;
- }
-
- /* Start the crawl */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering Fixlayout started");
-
- ret = gf_defrag_fix_layout(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout, NULL);
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Tiering fixlayout failed.");
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Failed to set dictionary value: key = %s",
- GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- ret = -1;
- goto out;
- }
-
- /* Marking the completion of tiering fix layout via a xattr on root */
- ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to set tiering fix "
- "layout completed xattr on %s",
- loc.path);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if (ret && defrag)
- defrag->total_failures++;
-
- if (dict)
- dict_unref(dict);
-
- return NULL;
-}
-
-int
-gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag,
- dict_t *fix_layout)
-{
- int ret = -1;
- dict_t *tier_dict = NULL;
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
-
- tier_dict = dict_new();
- if (!tier_dict) {
- gf_log("tier", GF_LOG_ERROR,
- "Tier fix layout failed :"
- "Creation of tier_dict failed");
- ret = -1;
- goto out;
- }
-
- /* Check if layout is fixed already */
- ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret != 0) {
- tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
-
- /*Fill crawl arguments */
- tier_fix_layout_arg->this = this;
- tier_fix_layout_arg->fix_layout = fix_layout;
-
- /* Spawn the fix layout thread so that its done in the
- * background */
- ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL,
- gf_tier_do_fix_layout, tier_fix_layout_arg,
- "tierfixl");
- if (ret) {
- gf_log("tier", GF_LOG_ERROR,
- "Thread creation failed. "
- "Background fix layout for tiering will not "
- "work.");
- defrag->total_failures++;
- goto out;
- }
- }
- ret = 0;
-out:
- if (tier_dict)
- dict_unref(tier_dict);
-
return ret;
}
-void
-gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- /* Check if background fixlayout is completed. This is not
- * multi-process safe i.e there is a possibility that by the time
- * we move to remove the xattr there it might have been cleared by some
- * other detach process from other node. We ignore the error if such
- * a thing happens */
- ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret) {
- /* Background fixlayout not complete - nothing to clear*/
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to retrieve fixlayout xattr."
- "Assume background fix layout not complete");
- goto out;
- }
-
- ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Failed removing tier fix layout "
- "xattr from %s",
- loc->path);
- goto out;
- }
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-}
-
-void
-gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag)
-{
- if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
- pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL);
- }
-}
-/******************Tier background Fix layout functions END********************/
-
int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4325,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4337,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4412,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4436,9 +4118,6 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
int thread_spawn_count = 0;
int index = 0;
pthread_t *tid = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
if (!defrag)
goto out;
@@ -4472,10 +4151,8 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- snprintf(thread_name, sizeof(thread_name), "dhtmig%d",
- ((index + 1) & 0x3ff));
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
- (void *)defrag, thread_name);
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
+ (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
index);
@@ -4548,7 +4225,6 @@ gf_defrag_start_crawl(void *data)
dict_t *migrate_data = NULL;
dict_t *status = NULL;
glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int ret = -1;
@@ -4564,7 +4240,6 @@ gf_defrag_start_crawl(void *data)
int thread_index = 0;
pthread_t *tid = NULL;
pthread_t filecnt_thread;
- gf_boolean_t is_tier_detach = _gf_false;
gf_boolean_t fc_thread_started = _gf_false;
this = data;
@@ -4583,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4717,43 +4393,17 @@ gf_defrag_start_crawl(void *data)
}
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Fix layout for attach tier */
- ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout);
- if (ret) {
- goto out;
- }
-
- methods = &(conf->methods);
-
- /* Calling tier_start of tier.c */
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START)
- is_tier_detach = _gf_true;
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -4767,19 +4417,6 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Wait for the tier fixlayout to
- * complete if its was started.*/
- gf_tier_wait_fix_lookup(defrag);
- }
-
- if (is_tier_detach && ret == 0) {
- /* If it was a detach remove the tier fix-layout
- * xattr on root. Ignoring the failure, as nothing has to be
- * done, logging is done in gf_tier_clear_fix_layout */
- gf_tier_clear_fix_layout(this, &loc, defrag);
- }
-
gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
@@ -4793,9 +4430,9 @@ out:
dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+ status = dict_new();
LOCK(&defrag->lock);
{
- status = dict_new();
gf_defrag_status_get(conf, status);
if (ctx && ctx->notify)
ctx->notify(GF_EN_DEFRAG_STATUS, status);
@@ -4878,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -4888,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -4939,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -4962,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
- gettimeofday(&end, NULL);
-
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -4987,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
@@ -5060,159 +4675,6 @@ out:
return 0;
}
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state)
-{
- pthread_mutex_lock(&tier_conf->pause_mutex);
- tier_conf->pause_state = state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-}
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf)
-{
- int state;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- state = tier_conf->pause_state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf)
-{
- int woke = 0;
- int state = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
-
- if (tier_conf->pause_state == TIER_RUNNING)
- goto out;
-
- if (tier_conf->pause_state == TIER_PAUSED)
- goto out;
-
- if (tier_conf->promote_in_progress || tier_conf->demote_in_progress)
- goto out;
-
- tier_conf->pause_state = TIER_PAUSED;
-
- if (tier_conf->pause_synctask) {
- synctask_wake(tier_conf->pause_synctask);
- tier_conf->pause_synctask = 0;
- woke = 1;
- }
-
- gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke);
-
- gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname);
-out:
- state = tier_conf->pause_state;
-
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-void
-gf_defrag_pause_tier_timeout(void *data)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- this = (xlator_t *)data;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause timer timeout");
-
- gf_defrag_check_pause_tier(&defrag->tier_conf);
-
-out:
- return;
-}
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- int ret = 0;
- struct timespec delta = {
- 0,
- };
- int delay = 2;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
- goto out;
-
- /*
- * Set flag requesting to pause tiering. Wait 'delay' seconds for
- * tiering to actually stop as indicated by the pause state
- * before returning success or failure.
- */
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- /*
- * If migration is not underway, can pause immediately.
- */
- gf_defrag_check_pause_tier(&defrag->tier_conf);
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause tier");
-
- defrag->tier_conf.pause_synctask = synctask_get();
- delta.tv_sec = delay;
- delta.tv_nsec = 0;
- defrag->tier_conf.pause_timer = gf_timer_call_after(
- this->ctx, delta, gf_defrag_pause_tier_timeout, this);
-
- synctask_yield(defrag->tier_conf.pause_synctask);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = -1;
-out:
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Pause tiering ret=%d", ret);
-
- return ret;
-}
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME,
- "Pause end. Resume tiering");
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname);
-
- return 0;
-}
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag)
-{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
-
- return 0;
-}
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index af342bdbe21..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,11 +11,9 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
#include "dht-lock.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
int
dht_rename_unlock(call_frame_t *frame, xlator_t *this);
@@ -505,6 +503,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc.pargfid, src);
else if (local->loc.parent)
uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
strcat(src, local->loc.name);
@@ -520,6 +520,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc2.pargfid, dst);
else if (local->loc2.parent)
uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
strcat(dst, local->loc2.name);
ret = strcmp(src, dst);
@@ -1009,9 +1011,11 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
xlator_t *prev = NULL;
dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
prev = cookie;
local = frame->local;
+ main_frame = local->main_frame;
/* TODO: Handle this case in lookup-optimize */
if (op_ret == -1) {
@@ -1024,7 +1028,8 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_linkfile_attr_heal(frame, this);
}
- dht_rename_unlink(frame, this);
+ dht_rename_unlink(main_frame, this);
+ DHT_STACK_DESTROY(frame);
return 0;
}
@@ -1040,7 +1045,8 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *src_cached = NULL;
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
- loc_t link_loc = {0};
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
local = frame->local;
prev = cookie;
@@ -1110,18 +1116,36 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
/* Create the linkto file for the dst file */
if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
- loc_copy(&link_loc, &local->loc2);
- if (link_loc.inode)
- inode_unref(link_loc.inode);
- link_loc.inode = inode_ref(local->loc.inode);
- gf_uuid_copy(local->gfid, local->loc.inode->gfid);
- gf_uuid_copy(link_loc.gfid, local->loc.inode->gfid);
-
- dht_linkfile_create(frame, dht_rename_links_create_cbk, this,
- src_cached, dst_hashed, &link_loc);
+ link_frame = copy_frame(frame);
+ if (!link_frame) {
+ goto unlink;
+ }
+
+ /* fop value sent as maxvalue because it is not used
+ * anywhere in this case */
+ link_local = dht_local_init(link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto unlink;
+ }
+
+ if (link_local->loc.inode)
+ inode_unref(link_local->loc.inode);
+ link_local->loc.inode = inode_ref(local->loc.inode);
+ link_local->main_frame = frame;
+ link_local->stbuf = local->stbuf;
+ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
+
+ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
+ src_cached, dst_hashed, &link_local->loc);
return 0;
}
+unlink:
+
+ if (link_frame) {
+ DHT_STACK_DESTROY(link_frame);
+ }
dht_rename_unlink(frame, this);
return 0;
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 0e57eab5f7f..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,12 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
-#include "dht-messages.h"
#include "dht-lock.h"
-#include "glusterfs-acl.h"
#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
do { \
@@ -22,7 +17,7 @@
layout->list[i].commit_hash = layout->commit_hash; \
\
gf_msg_trace(this->name, 0, \
- "gave fix: %u - %u, with commit-hash %u" \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
" on %s for %s", \
layout->list[i].start, layout->list[i].stop, \
layout->list[i].commit_hash, \
@@ -38,7 +33,7 @@
} \
} while (0)
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal);
@@ -149,8 +144,8 @@ dht_refresh_layout_done(call_frame_t *frame)
ret = dht_layout_sort(refreshed);
if (ret == -1) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sorting the layout failed");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
goto err;
}
@@ -206,10 +201,9 @@ dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
local->op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup of %s on %s returned error, gfid: %s",
- local->loc.path, prev->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
goto unlock;
}
@@ -270,9 +264,8 @@ dht_refresh_layout(call_frame_t *frame)
conf->subvolume_cnt);
if (!local->selfheal.refreshed_layout) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation for layout failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -284,9 +277,8 @@ dht_refresh_layout(call_frame_t *frame)
gf_uuid_unparse(local->loc.gfid, gfid);
local->xattr_req = dict_new();
if (local->xattr_req == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "dict mem allocation failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -294,9 +286,9 @@ dht_refresh_layout(call_frame_t *frame)
if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->xattr_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
}
for (i = 0; i < call_cnt; i++) {
@@ -529,7 +521,7 @@ out:
return fixit;
}
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal)
@@ -561,10 +553,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path: %s",
- gfid, local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -574,10 +564,9 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
if (lk_array[i] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation "
- "failed for lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -586,10 +575,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -598,10 +585,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
NULL, FAIL_ON_ANY_ERROR);
if (lk_array[0] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -627,7 +612,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -649,10 +634,9 @@ dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
err = 0;
} else {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "layout setxattr failed on %s, path:%s gfid:%s", subvol->name,
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
err = op_errno;
}
@@ -699,7 +683,7 @@ dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
return ret;
}
-int
+static int
dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout, int i,
xlator_t *req_subvol)
@@ -741,19 +725,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, DHT_IATT_IN_XDATA_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
@@ -761,27 +743,27 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- " %s: (subvol %s) Failed to extract disk layout,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr dictionary,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
goto err;
}
disk_layout = NULL;
gf_msg_trace(this->name, 0,
- "setting hash range %u - %u (type %d) on subvolume %s"
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
" for %s",
layout->list[i].start, layout->list[i].stop, layout->type,
subvol->name, loc->path);
@@ -791,20 +773,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
}
}
data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_OBJECTS_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
}
}
}
@@ -833,7 +812,7 @@ err:
return 0;
}
-int
+static int
dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -882,7 +861,7 @@ out:
return 0;
}
-int
+static int
dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -942,9 +921,8 @@ dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
dummy = dht_layout_new(this, 1);
if (!dummy) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dummy layout, path:%s gfid:%s", loc->path,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
@@ -960,38 +938,6 @@ out:
return 0;
}
-gf_boolean_t
-dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = 0;
- gf_boolean_t ret = _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
- ret = _gf_true;
- break;
- }
- }
-
- return ret;
-}
-
-int
-dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = -1;
- int j = 0;
-
- for (j = 0; j < layout->cnt; j++) {
- if (!strcmp(layout->list[j].xlator->name, xlator->name)) {
- i = j;
- break;
- }
- }
-
- return i;
-}
-
int
dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
@@ -1033,18 +979,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
int missing_attr = 0;
int i = 0, ret = -1;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
xlator_t *this = NULL;
int cnt = 0;
local = frame->local;
this = frame->this;
+ conf = this->private;
+
+ /* We need to heal the attrs if:
+ * 1. Any directories were missing - the newly created dirs will need
+ * to have the correct attrs set
+ * 2. An existing dir does not have the correct permissions -they may
+ * have been changed when a brick was down.
+ */
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1)
missing_attr++;
}
- if (missing_attr == 0) {
+ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
if (!local->heal_layout) {
gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
loc->path, uuid_utoa(loc->gfid));
@@ -1062,25 +1017,18 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
- local->call_cnt = missing_attr;
- cnt = layout->cnt;
+ cnt = local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < cnt; i++) {
- if (layout->list[i].err == -1) {
- gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s",
- loc->path, layout->list[i].xlator->name,
- uuid_utoa(loc->gfid));
-
- STACK_WIND(
- frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL);
- }
+ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
+ NULL);
}
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -1110,11 +1058,10 @@ dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name,
- ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: path = %s, gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
dht_iatt_merge(this, &local->preparent, preparent);
@@ -1133,89 +1080,7 @@ out:
return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict)
-{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(xattr);
- GF_ASSERT(dict);
-
- this = THIS;
- GF_ASSERT(this);
-
- acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR);
-
- if (!acl_default) {
- gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present");
- goto cont;
- }
- ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_DEFAULT_XATTR);
-cont:
- acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present");
- goto out;
- }
- ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_ACCESS_XATTR);
-
-out:
- return;
-}
-
-void
-dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst)
-{
- data_t *quota_limit_key = NULL;
- data_t *quota_limit_obj_key = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(src);
- GF_ASSERT(dst);
-
- this = THIS;
- GF_ASSERT(this);
-
- quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY);
- if (!quota_limit_key) {
- gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present");
- goto cont;
- }
- ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY);
-
-cont:
- quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY);
- if (!quota_limit_obj_key) {
- gf_msg_debug(this->name, 0,
- "QUOTA_LIMIT_OBJECTS_KEY xattr not present");
- goto out;
- }
- ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- QUOTA_LIMIT_OBJECTS_KEY);
-
-out:
- return;
-}
-
-int
+static int
dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -1239,10 +1104,8 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
@@ -1254,18 +1117,15 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
if (!dict) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "dict is NULL, need to make sure gfids are same");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
dict = dict_new();
if (!dict)
return -1;
}
ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value for"
- " key = %s at path: %s",
- GF_INTERNAL_CTX_KEY, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
/* We can still continue. As heal can still happen
* unless quota limits have reached for the dir.
*/
@@ -1297,7 +1157,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -1308,45 +1168,54 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
int this_call_cnt = 0;
int missing_dirs = 0;
dht_layout_t *layout = NULL;
- dht_conf_t *conf = 0;
+ xlator_t *prev = 0;
loc_t *loc = NULL;
- int check_mds = 0;
- int errst = 0;
- int32_t mds_xattr_val[1] = {0};
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
VALIDATE_OR_GOTO(this->private, err);
local = frame->local;
layout = local->layout;
loc = &local->loc;
- conf = this->private;
+ prev = cookie;
- if (local->gfid)
+ if (!gf_uuid_is_null(local->gfid))
gf_uuid_unparse(local->gfid, gfid_local);
- this_call_cnt = dht_frame_return(frame);
-
LOCK(&frame->lock);
{
+ index = dht_layout_index_for_subvol(layout, prev);
if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) {
local->selfheal.hole_cnt = !local->selfheal.hole_cnt
? 1
: local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
}
if (!op_ret) {
dht_iatt_merge(this, &local->stbuf, stbuf);
- }
- check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
- mds_xattr_val, 1, &errst);
- if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) {
- dict_unref(local->xattr);
- local->xattr = dict_ref(xattr);
+ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
}
}
UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
+
if (is_last_call(this_call_cnt)) {
if (local->selfheal.hole_cnt == layout->cnt) {
gf_msg_debug(this->name, op_errno,
@@ -1382,7 +1251,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -1402,19 +1271,14 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
return 0;
}
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
- "acquiring entrylk after inodelk failed for %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
local->op_errno = op_errno;
goto err;
@@ -1428,10 +1292,8 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary key list-xattr value "
- " for path %s ",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
for (i = 0; i < conf->subvolume_cnt; i++) {
if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
@@ -1454,18 +1316,21 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
int force)
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1476,16 +1341,18 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
}
if (missing_dirs == 0) {
+ /* We don't need to create any directories. Proceed to heal the
+ * attrs and xattrs
+ */
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for "
- "directory %s gfid %s ",
- local->loc.path, local->gfid);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
@@ -1494,28 +1361,53 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!ret)
return 0;
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to set mds xattr "
- "for directory %s gfid %s ",
- local->loc.path, local->gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
}
}
dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "(%s/%s) (path: %s): "
- "hashed subvolume not found",
- loc->pargfid, loc->name, loc->path);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
@@ -1531,7 +1423,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
dht_layout_t *layout)
{
@@ -1627,7 +1519,7 @@ dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
/* if layout->spread_cnt is set, check if it is <= available
* subvolumes (down brick and decommissioned bricks are considered
- * un-availbale). Else return count (available up bricks) */
+ * un-available). Else return count (available up bricks) */
count = ((layout->spread_cnt && (layout->spread_cnt <= count))
? layout->spread_cnt
: ((count) ? count : 1));
@@ -1640,8 +1532,6 @@ dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
void
-dht_layout_entry_swap(dht_layout_t *layout, int i, int j);
-void
dht_layout_range_swap(dht_layout_t *layout, int i, int j);
/*
@@ -1650,7 +1540,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j);
*/
#define OV_ENTRY(x, y) table[x * new->cnt + y]
-void
+static void
dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
dht_layout_t *new, dht_layout_t *old)
{
@@ -1727,7 +1617,7 @@ dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
}
}
-dht_layout_t *
+static dht_layout_t *
dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
@@ -1752,9 +1642,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
new_layout = dht_layout_new(this, priv->subvolume_cnt);
if (!new_layout) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for new_layout, path:%s gfid:%s",
- loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
goto done;
}
@@ -1764,10 +1653,9 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (subvol_down) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Layout fix failed: %u subvolume(s) are down"
- ". Skipping fix layout. path:%s gfid:%s",
- subvol_down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
GF_FREE(new_layout);
return NULL;
}
@@ -1785,10 +1673,10 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (priv->du_stats) {
for (i = 0; i < priv->subvolume_cnt; ++i) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
- "subvolume %d (%s): %u chunks, path:%s", i,
- priv->subvolumes[i]->name, priv->du_stats[i].chunks,
- loc->path);
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
/* Maximize overlap if the bricks are all the same
* size.
@@ -1800,8 +1688,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
}
}
} else {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
- "no du stats ?!?");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
}
/* First give it a layout as though it is a new directory. This
@@ -1832,7 +1720,7 @@ done:
* Having to call this 2x for each entry in the layout is pretty horrible, but
* that's what all of this layout-sorting nonsense gets us.
*/
-uint32_t
+static uint32_t
dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
{
dht_conf_t *priv = parent->private;
@@ -1950,7 +1838,7 @@ done:
return;
}
-int
+static int
dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -2009,9 +1897,8 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
ret = -1;
goto out;
}
@@ -2069,10 +1956,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
+ xlator_t *this = NULL;
uint32_t down = 0;
uint32_t misc = 0;
int ret = 0;
- xlator_t *this = NULL;
char pgfid[GF_UUID_BUF_SIZE] = {0};
char gfid[GF_UUID_BUF_SIZE] = {0};
inode_t *linked_inode = NULL, *inode = NULL;
@@ -2083,6 +1970,20 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.dir_cbk = dir_cbk;
local->selfheal.layout = dht_layout_ref(this, layout);
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
+ }
+
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
gf_uuid_unparse(loc->parent->gfid, pgfid);
@@ -2090,9 +1991,9 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
ret = 0;
goto sorry_no_fix;
}
@@ -2102,6 +2003,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
inode_unref(inode);
}
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
&local->selfheal.missing_cnt, &local->selfheal.down,
@@ -2111,19 +2019,17 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
misc = local->selfheal.misc;
if (down) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: %d subvolumes down."
- "Not fixing. path = %s, gfid = %s",
- down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed : %d subvolumes "
- "have unrecoverable errors. path = %s, gfid = %s",
- misc, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
@@ -2209,29 +2115,28 @@ dht_dir_heal_xattrs(void *data)
gf_uuid_unparse(local->loc.gfid, gfid);
if (!mds_subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
gf_uuid_is_null(local->loc.gfid)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No gfid present so skip heal for path %s gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
internal_xattr = dict_new();
if (!internal_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
@@ -2239,18 +2144,17 @@ dht_dir_heal_xattrs(void *data)
user_xattr = dict_new();
if (!user_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "failed to list xattrs for "
- "%s: on %s ",
- local->loc.path, local->mds_subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
}
if (!mds_xattr)
@@ -2265,10 +2169,9 @@ dht_dir_heal_xattrs(void *data)
dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s,"
- " path = %s",
- GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
goto out;
}
}
@@ -2280,16 +2183,25 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
xattr_hashed = 1;
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Directory xattr heal failed. Failed to set"
- "user xattr on path %s on "
- "subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
@@ -2298,21 +2210,17 @@ dht_dir_heal_xattrs(void *data)
ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
1);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->mds_xattr_key, local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
goto out;
}
ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to reset internal xattr "
- "on path %s on subvol %s"
- "gfid = %s ",
- local->loc.path, mds_subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
}
}
@@ -2353,18 +2261,18 @@ dht_dir_attr_heal(void *data)
frame = data;
local = frame->local;
- mds_subvol = local->mds_subvol;
this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO("dht", local, out);
conf = this->private;
GF_VALIDATE_OR_GOTO("dht", conf, out);
+ mds_subvol = local->mds_subvol;
call_cnt = conf->subvolume_cnt;
if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -2372,11 +2280,9 @@ dht_dir_attr_heal(void *data)
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->subvolumes[i] == mds_subvol) {
if (!conf->subvolume_status[i]) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_DOWN,
- "mds subvol is down for path "
- " %s gfid is %s Unable to set xattr ",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -2402,10 +2308,9 @@ dht_dir_attr_heal(void *data)
if (ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Directory attr heal failed. Failed to set"
- " uid/gid on path %s on subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
}
}
out:
@@ -2420,7 +2325,7 @@ dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
}
/* EXIT: dht_update_commit_hash_for_layout */
-int
+static int
dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2440,7 +2345,7 @@ dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -2458,11 +2363,8 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
local->op_ret = -1;
}
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Winding unlock failed: stale locks left on brick"
- " %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
}
@@ -2470,7 +2372,7 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xdata)
@@ -2497,7 +2399,7 @@ dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2525,11 +2427,8 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
goto err;
}
@@ -2540,11 +2439,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_errno = ENOENT;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to find disk layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
goto err;
}
@@ -2558,12 +2456,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret == -1) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to extract disk"
- " layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
goto err;
}
@@ -2572,11 +2468,9 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr[i]) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
goto err;
}
@@ -2585,12 +2479,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret != 0) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr"
- " dictionary,",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 0a7aa15e242..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -9,7 +9,7 @@
*/
/* TODO: add NS locking */
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "dht-common.h"
#include "dht-messages.h"
@@ -17,24 +17,6 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \
- { \
- pthread_mutex_lock(&conf->defrag->dfq_mutex); \
- \
- if (!strcasecmp(conf->dthrottle, "lazy")) \
- conf->defrag->recon_thread_count = 1; \
- \
- throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
- \
- if (!strcasecmp(conf->dthrottle, "normal")) \
- conf->defrag->recon_thread_count = (throttle_count / 2); \
- \
- if (!strcasecmp(conf->dthrottle, "aggressive")) \
- conf->defrag->recon_thread_count = throttle_count; \
- \
- pthread_mutex_unlock(&conf->defrag->dfq_mutex); \
- }
-
/* TODO:
- use volumename in xattr instead of "dht"
- use NS locks
@@ -42,9 +24,7 @@
- complete linkfile selfheal
*/
-extern dht_methods_t dht_methods;
-
-void
+static void
dht_layout_dump(dht_layout_t *layout, const char *prefix)
{
char key[GF_DUMP_MAX_BUF_LEN];
@@ -52,8 +32,6 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
if (!layout)
goto out;
- if (!prefix)
- goto out;
gf_proc_dump_build_key(key, prefix, "cnt");
gf_proc_dump_write(key, "%d", layout->cnt);
@@ -73,9 +51,9 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
gf_proc_dump_write(key, "%d", layout->list[i].err);
gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
if (layout->list[i].xlator) {
gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
@@ -152,7 +130,7 @@ dht_priv_dump(xlator_t *this)
gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent);
snprintf(key, sizeof(key), "du_stats[%d].avail_space", i);
- gf_proc_dump_write(key, "%lu", conf->du_stats[i].avail_space);
+ gf_proc_dump_write(key, "%" PRIu64, conf->du_stats[i].avail_space);
snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i);
gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes);
@@ -162,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -264,7 +242,7 @@ out:
return ret;
}
-int
+static int
dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
const char *bricks)
{
@@ -278,6 +256,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
goto out;
dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
node = strtok_r(dup_brick, ",", &tmpstr);
while (node) {
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -306,14 +288,10 @@ out:
return ret;
}
-int
+static void
dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
{
int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->decommissioned_bricks[i]) {
@@ -321,13 +299,9 @@ dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
conf->decommission_subvols_cnt--;
}
}
-
- ret = 0;
-out:
-
- return ret;
}
-void
+
+static void
dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
gf_boolean_t *re_valid, dht_conf_t *conf)
{
@@ -384,7 +358,7 @@ out:
return ret;
}
-int
+static int
dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
{
int rebal_thread_count = 0;
@@ -401,18 +375,20 @@ dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
} else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
if ((rebal_thread_count > 0) &&
(rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_INFO, 0, 0,
"rebal thread count configured to %d",
rebal_thread_count);
- conf->defrag->recon_thread_count = rebal_thread_count;
+ goto out;
} else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
"Invalid option: Reconfigure: "
"rebal-throttle should be "
"within range of 0 and maximum number of"
" cores available");
ret = -1;
- pthread_mutex_unlock(&conf->defrag->dfq_mutex);
goto out;
}
} else {
@@ -521,9 +497,7 @@ dht_reconfigure(xlator_t *this, dict_t *options)
if (ret == -1)
goto out;
} else {
- ret = dht_decommissioned_remove(this, conf);
- if (ret == -1)
- goto out;
+ dht_decommissioned_remove(this, conf);
}
dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
@@ -563,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -609,7 +585,7 @@ out:
return ret;
}
-int
+static int
dht_init_methods(xlator_t *this)
{
int ret = -1;
@@ -622,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -1071,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 63ab926ca58..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
#include "dht-common.h"
struct xlator_fops dht_pt_fops = {
diff --git a/xlators/cluster/dht/src/dht.sym b/xlators/cluster/dht/src/dht.sym
deleted file mode 100644
index 24241a91baf..00000000000
--- a/xlators/cluster/dht/src/dht.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-xlator_api
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 558611384fe..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,15 +595,9 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
-class_methods_t class_methods = {.init = nufa_init,
- .fini = dht_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify};
-
struct xlator_fops fops = {
.lookup = nufa_lookup,
.create = nufa_create,
@@ -645,3 +639,19 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "nufa",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/cluster/dht/src/nufa.sym b/xlators/cluster/dht/src/nufa.sym
deleted file mode 100644
index 780b5fc0387..00000000000
--- a/xlators/cluster/dht/src/nufa.sym
+++ /dev/null
@@ -1,8 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index a3c384b0f5c..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
/* Get the pattern for considering switch case.
"option block-size *avi:10MB" etc */
option_string = gf_strdup(pattern_str);
+ if (option_string == NULL) {
+ goto err;
+ }
switch_str = strtok_r(option_string, ";", &tmp_str);
while (switch_str) {
dup_str = gf_strdup(switch_str);
+ if (dup_str == NULL) {
+ goto err;
+ }
switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
gf_switch_mt_switch_struct);
if (!switch_opt) {
@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
if (childs) {
dup_childs = gf_strdup(childs);
+ if (dup_childs == NULL) {
+ goto err;
+ }
child = strtok_r(dup_childs, ",", &tmp);
while (child) {
if (gf_switch_valid_child(this, child)) {
@@ -823,11 +832,6 @@ err:
return -1;
}
-class_methods_t class_methods = {.init = switch_init,
- .fini = switch_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify};
-
struct xlator_fops fops = {
.lookup = switch_lookup,
.create = switch_create,
@@ -869,3 +873,19 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "switch",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/cluster/dht/src/switch.sym b/xlators/cluster/dht/src/switch.sym
deleted file mode 100644
index 780b5fc0387..00000000000
--- a/xlators/cluster/dht/src/switch.sym
+++ /dev/null
@@ -1,8 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
deleted file mode 100644
index b86ed673042..00000000000
--- a/xlators/cluster/dht/src/tier-common.c
+++ /dev/null
@@ -1,1199 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "libxlator.h"
-#include "dht-common.h"
-#include "defaults.h"
-#include "tier-common.h"
-#include "tier.h"
-
-int
-dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- loc_t *oldloc = NULL;
- loc_t *newloc = NULL;
-
- local = frame->local;
-
- oldloc = &local->loc;
- newloc = &local->loc2;
-
- if (op_ret == -1) {
- /* No continuation on DHT inode missing errors, as we should
- * then have a good stbuf that states P2 happened. We would
- * get inode missing if, the file completed migrated between
- * the lookup and the link call */
- goto out;
- }
-
- if (local->call_cnt != 1) {
- goto out;
- }
-
- local->call_cnt = 2;
-
- /* Do this on the hot tier now */
-
- STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
- local->cached_subvol->fops->link, oldloc, newloc, xdata);
-
- return 0;
-
-out:
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
-
- return 0;
-}
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->call_cnt = 1;
-
- cached_subvol = local->cached_subvol;
-
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- oldloc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- ret = loc_copy(&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (hashed_subvol == cached_subvol) {
- STACK_WIND(frame, dht_link_cbk, cached_subvol,
- cached_subvol->fops->link, oldloc, newloc, xdata);
- return 0;
- }
-
- /* Create hardlinks to both the data file on the hot tier
- and the linkto file on the cold tier */
-
- gf_uuid_copy(local->gfid, oldloc->inode->gfid);
-
- STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
- oldloc, newloc, xdata);
-
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->params) {
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
-}
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- xlator_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- conf = this->private;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- if (op_ret == -1) {
- if (local->linked == _gf_true && local->xattr_req) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
- local->xattr_req);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value to "
- "unlink of migrating file");
- goto out;
- }
-
- STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
- 0, local->xattr_req);
- return 0;
- }
- goto out;
- }
-
- prev = cookie;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
-
- dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
- }
-
- ret = dht_layout_preset(this, prev, inode);
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
- prev->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- local->op_errno = op_errno;
-
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
- dht_linkfile_attr_heal(frame, this);
- }
-out:
- if (local) {
- if (local->xattr_req) {
- dict_del(local->xattr_req, TIER_LINKFILE_GFID);
- }
- }
-
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- unsigned char *gfid = NULL;
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
-
- conf = this->private;
- if (!conf) {
- local->op_errno = EINVAL;
- op_errno = EINVAL;
- goto err;
- }
-
- cached_subvol = TIER_UNHASHED_SUBVOL;
-
- if (local->params) {
- dict_del(local->params, conf->link_xattr_name);
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- /*
- * We will delete the linkfile if data file creation fails.
- * When deleting this stale linkfile, there is a possibility
- * for a race between this linkfile deletion and a stale
- * linkfile deletion triggered by another lookup from different
- * client.
- *
- * For eg:
- *
- * Client 1 Client 2
- *
- * 1 linkfile created for foo
- *
- * 2 data file creation failed
- *
- * 3 creating a file with same name
- *
- * 4 lookup before creation deleted
- * the linkfile created by client1
- * considering as a stale linkfile.
- *
- * 5 New linkfile created for foo
- * with different gfid.
- *
- * 6 Trigger linkfile deletion as
- * data file creation failed.
- *
- * 7 Linkfile deleted which is
- * created by client2.
- *
- * 8 Data file created.
- *
- * With this race, we will end up having a file in a non-hashed subvol
- * without a linkfile in hashed subvol.
- *
- * To avoid this, we store the gfid of linkfile created by client, So
- * If we delete the linkfile , we validate gfid of existing file with
- * stored value from posix layer.
- *
- * Storing this value in local->xattr_req as local->params was also used
- * to create the data file. During the linkfile deletion we will use
- * local->xattr_req dictionary.
- */
- if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
- }
-
- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
- if (!gfid) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_uuid_copy(gfid, stbuf->ia_gfid);
- ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
- sizeof(uuid_t));
- if (ret) {
- GF_FREE(gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- TIER_LINKFILE_GFID);
- }
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, local->params);
-
- return 0;
-err:
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-gf_boolean_t
-tier_is_hot_tier_decommissioned(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- xlator_t *hot_tier = NULL;
- int i = 0;
-
- conf = this->private;
- hot_tier = conf->subvolumes[1];
-
- if (conf->decommission_subvols_cnt) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->decommissioned_bricks[i] &&
- conf->decommissioned_bricks[i] == hot_tier)
- return _gf_true;
- }
- }
-
- return _gf_false;
-}
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
-{
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
- xlator_t *cold_subvol = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- dht_get_du_info(frame, this, loc);
-
- local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- cold_subvol = TIER_HASHED_SUBVOL;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (conf->subvolumes[0] != cold_subvol) {
- hot_subvol = conf->subvolumes[0];
- }
- /*
- * if hot tier full, write to cold.
- * Also if hot tier is full, create in cold
- */
- if (dht_is_subvol_filled(this, hot_subvol) ||
- tier_is_hot_tier_decommissioned(this)) {
- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
- cold_subvol->name);
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
- cold_subvol->fops->create, loc, flags, mode, umask,
- fd, params);
- } else {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = hot_subvol;
- local->hashed_subvol = cold_subvol;
-
- gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
- hot_subvol->name, cold_subvol->name);
-
- dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
- hot_subvol, cold_subvol, loc);
-
- goto out;
- }
-out:
- return 0;
-
-err:
-
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *preparent, dict_t *xdata,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (!op_ret) {
- /*
- * linkfile present on hot tier. unlinking the linkfile
- */
- STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
- hot_subvol, hot_subvol->fops->unlink, &local->loc,
- local->flags, NULL);
- return 0;
- }
-
- LOCK(&frame->lock);
- {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
- prev->name);
- }
-
- UNLOCK(&frame->lock);
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- /* Ignore EINVAL for tier to ignore error when the file
- does not exist on the other tier */
- if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- struct iatt *stbuf = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- xlator_t *hot_tier = NULL;
- xlator_t *cold_tier = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- cold_tier = TIER_HASHED_SUBVOL;
- hot_tier = TIER_UNHASHED_SUBVOL;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- } else {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno,
- "Unlink: subvolume %s returned -1"
- " with errno = %d",
- prev->name, op_errno);
- goto unlock;
- }
-
- local->op_ret = 0;
-
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->preparent, 0);
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret)
- goto out;
-
- if (cold_tier != local->cached_subvol) {
- /*
- * File is present in hot tier, so there will be
- * a link file on cold tier, deleting the linkfile
- * from cold tier
- */
- STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
- cold_tier->fops->unlink, &local->loc, local->flags,
- xdata);
- return 0;
- }
-
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- if (!ret && stbuf &&
- ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
- /*
- * File is migrating from cold to hot tier.
- * Delete the destination linkfile.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
- hot_tier->fops->lookup, &local->loc, NULL);
- return 0;
- }
-
-out:
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- int ret = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->flags = xflag;
- if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
- /*
- * File resides in cold tier. We need to stat
- * the file to see if it is being promoted.
- * If yes we need to delete the destination
- * file as well.
- *
- * Currently we are doing this check only for
- * regular files.
- */
- xdata = xdata ? dict_ref(xdata) : dict_new();
- if (xdata) {
- ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
- if (ret) {
- gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
- DHT_IATT_IN_XDATA_KEY);
- }
- }
- }
-
- /*
- * File is on hot tier, delete the data file first, then
- * linkfile from cold.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->unlink, loc, xflag, xdata);
- if (xdata)
- dict_unref(xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- int count = 0;
-
- INIT_LIST_HEAD(&entries.list);
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "Memory allocation failed ");
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- xlator_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
- inode_table_t *itable = NULL;
- inode_t *inode = NULL;
-
- INIT_LIST_HEAD(&entries.list);
- prev = cookie;
- local = frame->local;
- itable = local->fd ? local->fd->inode->table : NULL;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
-
- if (op_ret < 0)
- goto done;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- next_offset = orig_entry->d_off;
-
- if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- continue;
- }
-
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref(orig_entry->dict);
-
- if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
- conf->link_xattr_name)) {
- goto entries;
-
- } else if (IA_ISDIR(entry->d_stat.ia_type)) {
- if (orig_entry->inode) {
- dht_inode_ctx_time_update(orig_entry->inode, this,
- &entry->d_stat, 1);
- }
- } else {
- if (orig_entry->inode) {
- ret = dht_layout_preset(this, prev, orig_entry->inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout "
- "in inode");
-
- entry->inode = inode_ref(orig_entry->inode);
- } else if (itable) {
- /*
- * orig_entry->inode might be null if any upper
- * layer xlators below client set to null, to
- * force a lookup on the inode even if the inode
- * is present in the inode table. In that case
- * we just update the ctx to make sure we didn't
- * missed anything.
- */
- inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
- if (inode) {
- ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout"
- " in inode");
- inode_unref(inode);
- inode = NULL;
- }
- }
- }
-
- entries:
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset != 0) {
- next_subvol = prev;
- } else {
- goto unwind;
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
- return 0;
- }
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
-{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
- int ret = 0;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref(fd);
- local->size = size;
- local->xattr_req = (dict) ? dict_ref(dict) : NULL;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref(dict);
- else
- local->xattr = dict_new();
-
- if (local->xattr) {
- ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- conf->link_xattr_name);
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
- hashed_subvol, hashed_subvol->fops->readdirp, fd,
- size, yoff, local->xattr);
-
- } else {
- STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->readdir, fd, size, yoff,
- local->xattr);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
-{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
-
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
-
-out:
- tier_do_readdir(frame, this, fd, size, yoff, op, 0);
- return 0;
-}
-
-int
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
-{
- tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
-
-int
-tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *statvfs, dict_t *xdata)
-{
- gf_boolean_t event = _gf_false;
- qdstatfs_action_t action = qdstatfs_action_OFF;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
- GF_UNUSED int ret = 0;
- unsigned long new_usage = 0;
- unsigned long cur_usage = 0;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- tier_statvfs_t *tier_stat = NULL;
-
- prev = cookie;
- local = frame->local;
- GF_ASSERT(local);
-
- conf = this->private;
-
- if (xdata)
- ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
-
- tier_stat = &local->tier_statvfs;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- if (!statvfs) {
- op_errno = EINVAL;
- local->op_ret = -1;
- goto unlock;
- }
- local->op_ret = 0;
-
- if (local->quota_deem_statfs) {
- if (event == _gf_true) {
- action = qdstatfs_action_COMPARE;
- } else {
- action = qdstatfs_action_NEGLECT;
- }
- } else {
- if (event == _gf_true) {
- action = qdstatfs_action_REPLACE;
- local->quota_deem_statfs = _gf_true;
- }
- }
-
- if (local->quota_deem_statfs) {
- switch (action) {
- case qdstatfs_action_NEGLECT:
- goto unlock;
-
- case qdstatfs_action_REPLACE:
- local->statvfs = *statvfs;
- goto unlock;
-
- case qdstatfs_action_COMPARE:
- new_usage = statvfs->f_blocks - statvfs->f_bfree;
- cur_usage = local->statvfs.f_blocks -
- local->statvfs.f_bfree;
-
- /* Take the max of the usage from subvols */
- if (new_usage >= cur_usage)
- local->statvfs = *statvfs;
- goto unlock;
-
- default:
- break;
- }
- }
-
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
-
- if (prev == TIER_HASHED_SUBVOL) {
- local->statvfs.f_blocks = statvfs->f_blocks;
- local->statvfs.f_files = statvfs->f_files;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
- tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
- tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
- tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
- tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
- tier_stat->hashed_fsid = statvfs->f_fsid;
- } else {
- tier_stat->unhashed_fsid = statvfs->f_fsid;
- tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
- statvfs->f_bfree);
- tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
- statvfs->f_bavail);
- tier_stat->unhashed_files_used = (statvfs->f_files -
- statvfs->f_ffree);
- tier_stat->unhashed_pfiles_used = (statvfs->f_files -
- statvfs->f_favail);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
- tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
- tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
- tier_stat->files_used += tier_stat->unhashed_files_used;
- tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
- }
- local->statvfs.f_bfree = local->statvfs.f_blocks -
- tier_stat->blocks_used;
- local->statvfs.f_bavail = local->statvfs.f_blocks -
- tier_stat->pblocks_used;
- local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
- local->statvfs.f_favail = local->statvfs.f_files -
- tier_stat->pfiles_used;
- DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
- }
-
- return 0;
-}
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- inode_t *inode = NULL;
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
- loc_t newloc = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
- itable = loc->inode->table;
- if (!itable) {
- op_errno = EINVAL;
- goto err;
- }
-
- loc = &local->loc2;
- root_gfid[15] = 1;
-
- inode = inode_find(itable, root_gfid);
- if (!inode) {
- op_errno = EINVAL;
- goto err;
- }
-
- dht_build_root_loc(inode, &newloc);
- loc = &newloc;
- }
-
- local->call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc, xdata);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
deleted file mode 100644
index b1ebaa8004d..00000000000
--- a/xlators/cluster/dht/src/tier-common.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_COMMON_H_
-#define _TIER_COMMON_H_
-/* Function definitions */
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata);
-
-int32_t
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict);
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata);
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
deleted file mode 100644
index 584f1dd76ba..00000000000
--- a/xlators/cluster/dht/src/tier.c
+++ /dev/null
@@ -1,3090 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <dlfcn.h>
-
-#include "dht-common.h"
-#include "tier.h"
-#include "tier-common.h"
-#include "syscall.h"
-#include "events.h"
-#include "tier-ctr-interface.h"
-
-/*Hard coded DB info*/
-static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
-/*Hard coded DB info*/
-
-/*Mutex for updating the data movement stats*/
-static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Stores the path location of promotion query files */
-static char *promotion_qfile;
-/* Stores the path location of demotion query files */
-static char *demotion_qfile;
-
-static void *libhandle;
-static gfdb_methods_t gfdb_methods;
-
-#define DB_QUERY_RECORD_SIZE 4096
-
-/*
- * Closes all the fds and frees the qfile_array
- * */
-static void
-qfile_array_free(tier_qfile_array_t *qfile_array)
-{
- ssize_t i = 0;
-
- if (qfile_array) {
- if (qfile_array->fd_array) {
- for (i = 0; i < qfile_array->array_size; i++) {
- if (qfile_array->fd_array[i] != -1) {
- sys_close(qfile_array->fd_array[i]);
- }
- }
- }
- GF_FREE(qfile_array->fd_array);
- }
- GF_FREE(qfile_array);
-}
-
-/* Create a new query file list with given size */
-static tier_qfile_array_t *
-qfile_array_new(ssize_t array_size)
-{
- int ret = -1;
- tier_qfile_array_t *qfile_array = NULL;
- ssize_t i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
-
- qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
- gf_tier_mt_qfile_array_t);
- if (!qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for tier_qfile_array_t");
- goto out;
- }
-
- qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
- gf_dht_mt_int32_t);
- if (!qfile_array->fd_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for "
- "tier_qfile_array_t->fd_array");
- goto out;
- }
-
- /* Init all the fds to -1 */
- for (i = 0; i < array_size; i++) {
- qfile_array->fd_array[i] = -1;
- }
-
- qfile_array->array_size = array_size;
- qfile_array->next_index = 0;
-
- /* Set exhausted count to list size as the list is empty */
- qfile_array->exhausted_count = qfile_array->array_size;
-
- ret = 0;
-out:
- if (ret) {
- qfile_array_free(qfile_array);
- qfile_array = NULL;
- }
- return qfile_array;
-}
-
-/* Checks if the query file list is empty or totally exhausted. */
-static gf_boolean_t
-is_qfile_array_empty(tier_qfile_array_t *qfile_array)
-{
- return (qfile_array->exhausted_count == qfile_array->array_size)
- ? _gf_true
- : _gf_false;
-}
-
-/* Shifts the next_fd pointer to the next available fd in the list */
-static void
-shift_next_index(tier_qfile_array_t *qfile_array)
-{
- int qfile_fd = 0;
- int spin_count = 0;
-
- if (is_qfile_array_empty(qfile_array)) {
- return;
- }
-
- do {
- /* change next_index in a rotional manner */
- (qfile_array->next_index == (qfile_array->array_size - 1))
- ? qfile_array->next_index = 0
- : qfile_array->next_index++;
-
- qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
-
- spin_count++;
-
- } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
-}
-
-/*
- * This is a non-thread safe function to read query records
- * from a list of query files in a Round-Robin manner.
- * As in when the query files get exhuasted they are closed.
- * Returns:
- * 0 if all the query records in all the query files of the list are
- * exhausted.
- * > 0 if a query record is successfully read. Indicates the size of the query
- * record read.
- * < 0 if there was failure
- * */
-static int
-read_query_record_list(tier_qfile_array_t *qfile_array,
- gfdb_query_record_t **query_record)
-{
- int ret = -1;
- int qfile_fd = 0;
-
- GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
- GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
-
- do {
- if (is_qfile_array_empty(qfile_array)) {
- ret = 0;
- break;
- }
-
- qfile_fd = qfile_array->fd_array[qfile_array->next_index];
- ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
- if (ret <= 0) {
- /*The qfile_fd has reached EOF or
- * there was an error.
- * 1. Close the exhausted fd
- * 2. increment the exhausted count
- * 3. shift next_qfile to next qfile
- **/
- sys_close(qfile_fd);
- qfile_array->fd_array[qfile_array->next_index] = -1;
- qfile_array->exhausted_count++;
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- continue;
- } else {
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- break;
- }
- } while (1);
-out:
- return ret;
-}
-
-/* Check and update the watermark every WM_INTERVAL seconds */
-#define WM_INTERVAL 5
-#define WM_INTERVAL_EMERG 1
-
-static int
-tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
- goto out;
- }
-
- if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get node-uuids for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "uuid_parse failed for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
- gf_msg_debug(this->name, 0, "%s does not belong to this node",
- loc->path);
- ret = 1;
- goto out;
- }
-
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
-tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
-{
- int ret = 0;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- dict_t *xdata = NULL;
- struct statvfs statfs = {
- 0,
- };
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "conf is NULL");
- ret = -1;
- goto exit;
- }
-
- defrag = conf->defrag;
- if (!defrag) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "defrag is NULL");
- ret = -1;
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- xdata = dict_new();
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dictionary");
- ret = -1;
- goto exit;
- }
-
- ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
- ret = -1;
- goto exit;
- }
-
- /* Find how much free space is on the hot subvolume.
- * Then see if that value */
- /* is less than or greater than user defined watermarks.
- * Stash results in */
- /* the tier_conf data structure. */
-
- ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to obtain statfs.");
- goto exit;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
-
- tier_conf->block_size = statfs.f_bsize;
- tier_conf->blocks_total = statfs.f_blocks;
- tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
-
- tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
- statfs.f_blocks);
- pthread_mutex_unlock(&dm_stat_mutex);
-
-exit:
- if (xdata)
- dict_unref(xdata);
- return ret;
-}
-
-static void
-tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
- tier_watermark_op_t new_wm)
-{
- if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_MID) {
- if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_HI) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- }
- }
-}
-
-int
-tier_check_watermark(xlator_t *this)
-{
- int ret = -1;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- tier_watermark_op_t wm = TIER_WM_NONE;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- tier_conf = &defrag->tier_conf;
-
- if (tier_conf->percent_full < tier_conf->watermark_low) {
- wm = TIER_WM_LOW;
-
- } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
- wm = TIER_WM_MID;
-
- } else {
- wm = TIER_WM_HI;
- }
-
- if (wm != tier_conf->watermark_last) {
- tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
- wm);
-
- tier_conf->watermark_last = wm;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tier watermark now %d", wm);
- }
-
- ret = 0;
-
-exit:
- return ret;
-}
-
-static gf_boolean_t
-is_hot_tier_full(gf_tier_conf_t *tier_conf)
-{
- if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_true;
-
- return _gf_false;
-}
-
-int
-tier_do_migration(xlator_t *this, int promote)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- long rand = 0;
- int migrate = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- if (tier_check_watermark(this) != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get watermark");
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- switch (tier_conf->watermark_last) {
- case TIER_WM_LOW:
- migrate = promote ? 1 : 0;
- break;
- case TIER_WM_HI:
- migrate = promote ? 0 : 1;
- break;
- case TIER_WM_MID:
- /* coverity[DC.WEAK_CRYPTO] */
- rand = random() % 100;
- if (promote) {
- migrate = (rand > tier_conf->percent_full);
- } else {
- migrate = (rand <= tier_conf->percent_full);
- }
- break;
- }
-
-exit:
- return migrate;
-}
-
-int
-tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
- gf_tier_conf_t *tier_conf)
-{
- int ret = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 1;
- else
- tier_conf->demote_in_progress = 1;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- /* Data migration */
- ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 0;
- else
- tier_conf->demote_in_progress = 0;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return ret;
-}
-
-/* returns _gf_true: if file can be promoted
- * returns _gf_false: if file cannot be promoted
- */
-static gf_boolean_t
-tier_can_promote_file(xlator_t *this, char const *file_name,
- struct iatt *current, gf_defrag_info_t *defrag)
-{
- gf_boolean_t ret = _gf_false;
- fsblkcnt_t estimated_usage = 0;
-
- if (defrag->tier_conf.tier_max_promote_size &&
- (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "File %s (gfid:%s) with size (%" PRIu64
- ") exceeds maxsize "
- "(%d) for promotion. File will not be promoted.",
- file_name, uuid_utoa(current->ia_gfid), current->ia_size,
- defrag->tier_conf.tier_max_promote_size);
- goto err;
- }
-
- /* bypass further validations for TEST mode */
- if (defrag->tier_conf.mode != TIER_MODE_WM) {
- ret = _gf_true;
- goto err;
- }
-
- /* convert the file size to blocks as per the block size of the
- * destination tier
- * NOTE: add (block_size - 1) to get the correct block size when
- * there is a remainder after a modulo
- */
- estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
- defrag->tier_conf.block_size) +
- defrag->tier_conf.blocks_used;
-
- /* test if the estimated block usage goes above HI watermark */
- if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
- defrag->tier_conf.watermark_hi) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Estimated block count consumption on "
- "hot tier (%" PRIu64
- ") exceeds hi watermark (%d%%). "
- "File will not be promoted.",
- estimated_usage, defrag->tier_conf.watermark_hi);
- goto err;
- }
- ret = _gf_true;
-err:
- return ret;
-}
-
-static int
-tier_set_migrate_data(dict_t *migrate_data)
-{
- int failed = 1;
-
- failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest the xattr call is from migrator */
- failed = dict_set_str(migrate_data, "from.migrator", "yes");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
- if (failed) {
- goto bail_out;
- }
-
- failed = 0;
-
-bail_out:
- return failed;
-}
-
-static char *
-tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
- int *per_link_status)
-{
- int ret = -1;
- char *parent_path = NULL;
- dict_t *xdata_request = NULL;
- dict_t *xdata_response = NULL;
-
- xdata_request = dict_new();
- if (!xdata_request) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create xdata_request dict");
- goto err;
- }
- ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set value to dict : key %s \n",
- GET_ANCESTRY_PATH_KEY);
- goto err;
- }
-
- ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
- &xdata_response);
- /* When the parent gfid is a stale entry, the lookup
- * will fail and stop the demotion process.
- * The parent gfid can be stale when a huge folder is
- * deleted while the files within it are being migrated
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
- if (ret || !parent_path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
-
-err:
- if (xdata_request) {
- dict_unref(xdata_request);
- }
-
- if (xdata_response) {
- dict_unref(xdata_response);
- xdata_response = NULL;
- }
-
- return parent_path;
-}
-
-static int
-tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
- gfdb_link_info_t *link_info,
- char const *parent_path, loc_t *loc,
- int *per_link_status)
-{
- int ret = -1;
-
- loc->name = gf_strdup(link_info->file_name);
- if (!loc->name) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Memory "
- "allocation failed for %s",
- uuid_utoa(gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "construct file path for %s %s\n",
- parent_path, loc->name);
- *per_link_status = -1;
- goto err;
- }
-
- ret = 0;
-
-err:
- return ret;
-}
-
-static int
-tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
- int *per_link_status)
-{
- int ret = -1;
-
- ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
-
- /* The file may be deleted even when the parent
- * is available and the lookup will
- * return a stale entry which would stop the
- * migration. so if its a stale entry, then skip
- * the file and keep migrating.
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "lookup file %s\n",
- loc->name);
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
-
-err:
- return ret;
-}
-
-static gf_boolean_t
-tier_is_file_already_at_destination(xlator_t *src_subvol,
- query_cbk_args_t *query_cbk_args,
- dht_conf_t *conf, int *per_link_status)
-{
- gf_boolean_t at_destination = _gf_true;
-
- if (src_subvol == NULL) {
- *per_link_status = 1;
- goto err;
- }
- if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
- *per_link_status = 1;
- goto err;
- }
-
- if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
- *per_link_status = 1;
- goto err;
- }
- at_destination = _gf_false;
-
-err:
- return at_destination;
-}
-
-static void
-tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
- gf_defrag_info_t *defrag,
- uint64_t *total_migrated_bytes, int *total_files)
-{
- if (query_cbk_args->is_promotion) {
- defrag->total_files_promoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used += defrag->tier_conf
- .st_last_promoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else {
- defrag->total_files_demoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- if (defrag->tier_conf.blocks_total) {
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.percent_full = GF_PERCENTAGE(
- defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
- pthread_mutex_unlock(&dm_stat_mutex);
- }
-
- (*total_files)++;
-}
-
-static int
-tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
- gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
- query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
- int *per_link_status, int *total_files,
- uint64_t *total_migrated_bytes)
-{
- int ret = -1;
- struct iatt current = {
- 0,
- };
- struct iatt par_stbuf = {
- 0,
- };
- loc_t p_loc = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- xlator_t *src_subvol = NULL;
- inode_t *linked_inode = NULL;
- char *parent_path = NULL;
-
- /* Lookup for parent and get the path of parent */
- gf_uuid_copy(p_loc.gfid, link_info->pargfid);
- p_loc.inode = inode_new(defrag->root_inode->table);
- if (!p_loc.inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create reference to inode"
- " for %s",
- uuid_utoa(p_loc.gfid));
-
- *per_link_status = -1;
- goto err;
- }
-
- parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
- per_link_status);
- if (!parent_path) {
- goto err;
- }
-
- linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
- inode_unref(p_loc.inode);
- p_loc.inode = linked_inode;
-
- /* Preparing File Inode */
- gf_uuid_copy(loc.gfid, gfid);
- loc.inode = inode_new(defrag->root_inode->table);
- gf_uuid_copy(loc.pargfid, link_info->pargfid);
- loc.parent = inode_ref(p_loc.inode);
-
- /* Get filename and Construct file path */
- if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
- per_link_status) != 0) {
- goto err;
- }
- gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
-
- /* lookup file inode */
- if (tier_lookup_file(this, &p_loc, &loc, &current, per_link_status) != 0) {
- goto err;
- }
-
- if (query_cbk_args->is_promotion) {
- if (!tier_can_promote_file(this, link_info->file_name, &current,
- defrag)) {
- *per_link_status = 1;
- goto err;
- }
- }
-
- linked_inode = inode_link(loc.inode, NULL, NULL, &current);
- inode_unref(loc.inode);
- loc.inode = linked_inode;
-
- /*
- * Do not promote/demote if file already is where it
- * should be. It means another brick moved the file
- * so is not an error. So we set per_link_status = 1
- * so that we ignore counting this.
- */
- src_subvol = dht_subvol_get_cached(this, loc.inode);
-
- if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
- per_link_status)) {
- goto err;
- }
-
- gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
- (query_cbk_args->is_promotion ? "promote" : "demote"),
- src_subvol->name, loc.path);
-
- ret = tier_check_same_node(this, &loc, defrag);
- if (ret != 0) {
- if (ret < 0) {
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
- /* By setting per_link_status to 1 we are
- * ignoring this status and will not be counting
- * this file for migration */
- *per_link_status = 1;
- goto err;
- }
-
- gf_uuid_copy(loc.gfid, loc.inode->gfid);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_link");
- goto err;
- }
-
- ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
- &defrag->tier_conf);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "migrate %s ",
- loc.path);
- *per_link_status = -1;
- goto err;
- }
-
- tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
- total_files);
-
- ret = 0;
-
-err:
- GF_FREE((char *)loc.name);
- loc.name = NULL;
- loc_wipe(&loc);
- loc_wipe(&p_loc);
-
- if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
- (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Reached cycle migration limit."
- "migrated bytes %" PRId64 " files %d",
- *total_migrated_bytes, *total_files);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-tier_migrate_using_query_file(void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
- xlator_t *this = NULL;
- gf_defrag_info_t *defrag = NULL;
- gfdb_query_record_t *query_record = NULL;
- gfdb_link_info_t *link_info = NULL;
- dict_t *migrate_data = NULL;
- /*
- * per_file_status and per_link_status
- * 0 : success
- * -1 : failure
- * 1 : ignore the status and don't count for migration
- * */
- int per_file_status = 0;
- int per_link_status = 0;
- int total_status = 0;
- dht_conf_t *conf = NULL;
- uint64_t total_migrated_bytes = 0;
- int total_files = 0;
- loc_t root_loc = {0};
- gfdb_time_t start_time = {0};
- gfdb_time_t current_time = {0};
- int total_time = 0;
- int max_time = 0;
- gf_boolean_t emergency_demote_mode = _gf_false;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = query_cbk_args->defrag;
- migrate_data = dict_new();
- if (!migrate_data)
- goto out;
-
- emergency_demote_mode = (!query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf));
-
- if (tier_set_migrate_data(migrate_data) != 0) {
- goto out;
- }
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- ret = gettimeofday(&start_time, NULL);
- if (query_cbk_args->is_promotion) {
- max_time = defrag->tier_conf.tier_promote_frequency;
- } else {
- max_time = defrag->tier_conf.tier_demote_frequency;
- }
-
- /* Per file */
- while ((ret = read_query_record_list(query_cbk_args->qfile_array,
- &query_record)) != 0) {
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to fetch query record "
- "from query file");
- goto out;
- }
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Exiting tier migration as"
- "defrag status is not started");
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not get current time.");
- goto out;
- }
-
- total_time = current_time.tv_sec - start_time.tv_sec;
- if (total_time > max_time) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Max cycle time reached. Exiting migration.");
- goto out;
- }
-
- per_file_status = 0;
- per_link_status = 0;
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_using_query_file");
- break;
- }
-
- if (defrag->tier_conf.mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_get_fs_stat() FAILED ... "
- "skipping file migrations until next cycle");
- break;
- }
-
- if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* We have crossed the high watermark. Stop processing
- * files if this is a promotion cycle so demotion gets
- * a chance to start if not already running*/
-
- if (query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "High watermark crossed during "
- "promotion. Exiting "
- "tier_migrate_using_query_file");
- break;
- }
- continue;
- }
- }
-
- per_link_status = 0;
-
- /* For now we only support single link migration. And we will
- * ignore other hard links in the link info list of query record
- * TODO: Multiple hard links migration */
- if (!list_empty(&query_record->link_list)) {
- link_info = list_first_entry(&query_record->link_list,
- gfdb_link_info_t, list);
- }
- if (link_info != NULL) {
- if (tier_migrate_link(this, conf, query_record->gfid, link_info,
- defrag, query_cbk_args, migrate_data,
- &per_link_status, &total_files,
- &total_migrated_bytes) != 0) {
- gf_msg(
- this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s failed for %s(gfid:%s)",
- (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
- link_info->file_name, uuid_utoa(query_record->gfid));
- }
- }
- per_file_status = per_link_status;
-
- if (per_file_status < 0) { /* Failure */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_failures++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 0) { /* Success */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_files++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 1) { /* Ignore */
- per_file_status = 0;
- /* Since this attempt was ignored we
- * decrement the lookup count*/
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->num_files_lookedup--;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- total_status = total_status + per_file_status;
- per_link_status = 0;
- per_file_status = 0;
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* If we are demoting and the entry watermark was HI, then
- * we are done with emergency demotions if the current
- * watermark has fallen below hi-watermark level
- */
- if (emergency_demote_mode) {
- if (tier_check_watermark(this) == 0) {
- if (!is_hot_tier_full(&defrag->tier_conf)) {
- break;
- }
- }
- }
- }
-
-out:
- if (migrate_data)
- dict_unref(migrate_data);
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- return total_status;
-}
-
-/* This is the call back function per record/file from data base */
-static int
-tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = _args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
-
- ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
- gfdb_query_record);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed writing query record to query file");
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup++;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
- return ret;
-}
-
-/* Create query file in tier process */
-static int
-tier_process_self_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- /* Query for eligible files from db */
- query_cbk_args->query_fd = open(local_brick->qfile_path,
- O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args->query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open query file %s", local_brick->qfile_path);
- goto out;
- }
- if (!gfdb_brick_info->_gfdb_promote) {
- if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
- /* emergency demotion mode */
- ret = gfdb_methods.find_all(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- query_cbk_args->defrag->tier_conf.query_limit);
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_unchanged_for_time_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_recently_changed_files(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_recently_changed_files_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- /*Clear the heat on the DB entries*/
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_CLEAR_OPS, ret, out);
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed clearing the heat "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- if (query_cbk_args && query_cbk_args->query_fd >= 0) {
- sys_close(query_cbk_args->query_fd);
- query_cbk_args->query_fd = -1;
- }
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*Ask CTR to create the query file*/
-static int
-tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int count = 0;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
- gf_tier_mt_ipc_ctr_params_t);
- if (!ipc_ctr_params) {
- goto out;
- }
-
- /* set all the query params*/
- ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
-
- ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
- ->write_freq_threshold;
-
- ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
- ->read_freq_threshold;
-
- ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
-
- ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
- query_cbk_args->defrag->tier_conf
- .watermark_last == TIER_WM_HI);
-
- memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
- sizeof(gfdb_time_t));
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_QUERY_OPS, ret, out);
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
- ret, out);
-
- ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- ipc_ctr_params, sizeof(*ipc_ctr_params));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed setting %s to params dictionary",
- GFDB_IPC_CTR_GET_QUERY_PARAMS);
- GF_FREE(ipc_ctr_params);
- goto out;
- }
- ipc_ctr_params = NULL;
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
- &ctr_ipc_out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
- "Failed query on %s ret %d", local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
- &count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed getting count "
- "of records on %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- if (count < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed query on %s", local_brick->brick_db_path);
- ret = -1;
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup = count;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
-
- if (ctr_ipc_in_dict) {
- dict_unref(ctr_ipc_in_dict);
- ctr_ipc_in_dict = NULL;
- }
-
- if (ctr_ipc_out_dict) {
- dict_unref(ctr_ipc_out_dict);
- ctr_ipc_out_dict = NULL;
- }
-
- GF_FREE(ipc_ctr_params);
-
- return ret;
-}
-
-/* This is the call back function for each brick from hot/cold bricklist
- * It picks up each bricks db and queries for eligible files for migration.
- * The list of eligible files are populated in appropriate query files*/
-static int
-tier_process_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- char *strval = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- if (dht_tier_db_type == GFDB_SQLITE3) {
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_KEY);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
- "journal_mode");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_KEY);
- goto out;
- }
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
- ctr_ipc_in_dict, &ctr_ipc_out_dict);
- if (ret || ctr_ipc_out_dict == NULL) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get "
- "journal_mode of sql db %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
- "Failed to get %s "
- "from params dictionary"
- "journal_mode",
- strval);
- goto out;
- }
-
- if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- } else {
- ret = tier_process_ctr_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
- ret = 0;
-
- } else {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- if (ctr_ipc_in_dict)
- dict_unref(ctr_ipc_in_dict);
-
- if (ctr_ipc_out_dict)
- dict_unref(ctr_ipc_out_dict);
-
- return ret;
-}
-
-static int
-tier_build_migration_qfile(migration_args_t *args,
- query_cbk_args_t *query_cbk_args,
- gf_boolean_t is_promotion)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the */
- /* sleep call triggers. A file may be registered in CTR some number */
- /* of usec X after the daemon started and missed in the subsequent */
- /* cycle if the daemon starts Y usec after the period in seconds */
- /* where Y>X. Normalize away this problem by always setting usec */
- /* to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
- gfdb_brick_info._gfdb_promote = is_promotion;
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
- local_brick->brick_name, i);
-
- /* Delete any old query files for this brick */
- sys_unlink(local_brick->qfile_path);
-
- ret = tier_process_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
- local_brick->brick_db_path);
- }
- i++;
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_migrate_files_using_qfile(migration_args_t *comp,
- query_cbk_args_t *query_cbk_args)
-{
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
- gfdb_time_t current_time = {
- 0,
- };
- ssize_t qfile_array_size = 0;
- int count = 0;
- int temp_fd = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- tier_conf = &(query_cbk_args->defrag->tier_conf);
-
- /* Time for error query files */
- gettimeofday(&current_time, NULL);
-
- /* Build the qfile list */
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- qfile_array_size++;
- }
- query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
- if (!query_cbk_args->qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create new "
- "qfile_array");
- goto out;
- }
-
- /*Open all qfiles*/
- count = 0;
- query_cbk_args->qfile_array->exhausted_count = 0;
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- temp_fd = query_cbk_args->qfile_array->fd_array[count];
- temp_fd = open(local_brick->qfile_path, O_RDONLY,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (temp_fd < 0) {
- gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open "
- "%s to the query file",
- local_brick->qfile_path);
- query_cbk_args->qfile_array->exhausted_count++;
- }
- query_cbk_args->qfile_array->fd_array[count] = temp_fd;
- count++;
- }
-
- /* Moving the query file index to the next, so that we won't the same
- * query file every cycle as the first one */
- query_cbk_args->qfile_array
- ->next_index = (query_cbk_args->is_promotion)
- ? tier_conf->last_promote_qfile_index
- : tier_conf->last_demote_qfile_index;
- shift_next_index(query_cbk_args->qfile_array);
- if (query_cbk_args->is_promotion) {
- tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- } else {
- tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- }
-
- /* Migrate files using query file list */
- ret = tier_migrate_using_query_file((void *)query_cbk_args);
-out:
- qfile_array_free(query_cbk_args->qfile_array);
-
- /* If there is an error rename all the query files to .err files
- * with a timestamp for better debugging */
- if (ret) {
- struct tm tm = {
- 0,
- };
- char time_str[128] = {
- 0,
- };
- char query_file_path_err[PATH_MAX] = {
- 0,
- };
- int32_t len = 0;
-
- /* Time format for error query files */
- gmtime_r(&current_time.tv_sec, &tm);
- strftime(time_str, sizeof(time_str), "%F-%T", &tm);
-
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- /* rename error qfile*/
- len = snprintf(query_file_path_err, sizeof(query_file_path_err),
- "%s-%s.err", local_brick->qfile_path, time_str);
- if ((len >= 0) && (len < sizeof(query_file_path_err))) {
- if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
- -1)
- gf_msg_debug("tier", 0,
- "rename "
- "failed");
- }
- }
- }
-
- query_cbk_args->qfile_array = NULL;
-
- return ret;
-}
-
-int
-tier_demote(migration_args_t *demotion_args)
-{
- query_cbk_args_t query_cbk_args;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
- GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
-
- THIS = demotion_args->this;
-
- query_cbk_args.this = demotion_args->this;
- query_cbk_args.defrag = demotion_args->defrag;
- query_cbk_args.is_promotion = 0;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- demotion_args->return_value = ret;
- return ret;
-}
-
-int
-tier_promote(migration_args_t *promotion_args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
- out);
-
- THIS = promotion_args->this;
-
- query_cbk_args.this = promotion_args->this;
- query_cbk_args.defrag = promotion_args->defrag;
- query_cbk_args.is_promotion = 1;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- promotion_args->return_value = ret;
- return ret;
-}
-
-/*
- * Command the CTR on a brick to compact the local database using an IPC
- */
-static int
-tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- ret = 0;
-
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_int32(ctr_ipc_dict, "compact_active",
- query_cbk_args->defrag->tier_conf.compact_active);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_active");
- goto out;
- }
-
- ret = dict_set_int32(
- ctr_ipc_dict, "compact_mode_switched",
- query_cbk_args->defrag->tier_conf.compact_mode_switched);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_mode_switched");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Starting Compaction IPC");
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Ending Compaction IPC");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed compaction "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "SUCCESS: %s Compaction", local_brick->brick_name);
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*
- * This is the call back function for each brick from hot/cold bricklist.
- * It determines the database type on each brick and calls the corresponding
- * function to prepare the compaction IPC.
- */
-static int
-tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- ret = tier_process_self_compact(local_brick, args);
- if (ret) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Brick %s did not compact", local_brick->brick_name);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-static int
-tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
-
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the sleep
- call triggers. A file may be registered in CTR some number of usec X
- after the daemon started and missed in the subsequent cycle if the
- daemon starts Y usec after the period in seconds where Y>X. Normalize
- away this problem by always setting usec to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
-
- /* This is meant to say we are always compacting at this point */
- /* We simply borrow the promotion flag to do this */
- gfdb_brick_info._gfdb_promote = 1;
-
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction for %s", local_brick->brick_name);
-
- ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
- local_brick->brick_db_path);
- }
-
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction for %s", local_brick->brick_name);
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_compact(void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- migration_args_t *compaction_args = args;
-
- GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name,
- compaction_args->brick_list, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
- out);
-
- THIS = compaction_args->this;
-
- query_cbk_args.this = compaction_args->this;
- query_cbk_args.defrag = compaction_args->defrag;
- query_cbk_args.is_compaction = 1;
-
- /* Send the compaction pragma out to all the bricks on the bricklist. */
- /* tier_get_bricklist ensures all bricks on the list are local to */
- /* this node. */
- ret = tier_send_compact(compaction_args, &query_cbk_args);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- compaction_args->return_value = ret;
- return ret;
-}
-
-static int
-tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
-{
- xlator_list_t *child = NULL;
- char *rv = NULL;
- char *rh = NULL;
- char *brickname = NULL;
- char db_name[PATH_MAX] = "";
- int ret = 0;
- tier_brick_list_t *local_brick = NULL;
- int32_t len = 0;
-
- GF_VALIDATE_OR_GOTO("tier", xl, out);
- GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
-
- /*
- * This function obtains remote subvolumes and filters out only
- * those running on the same node as the tier daemon.
- */
- if (strcmp(xl->type, "protocol/client") == 0) {
- ret = dict_get_str(xl->options, "remote-host", &rh);
- if (ret < 0)
- goto out;
-
- if (gf_is_local_addr(rh)) {
- local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
- gf_tier_mt_bricklist_t);
- if (!local_brick) {
- goto out;
- }
-
- ret = dict_get_str(xl->options, "remote-subvolume", &rv);
- if (ret < 0)
- goto out;
-
- brickname = strrchr(rv, '/') + 1;
- snprintf(db_name, sizeof(db_name), "%s.db", brickname);
-
- local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
- if (!local_brick->brick_db_path) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for"
- " bricklist.");
- ret = -1;
- goto out;
- }
-
- len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
- GF_HIDDEN_PATH, db_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
- "DB path too long");
- ret = -1;
- goto out;
- }
-
- local_brick->xlator = xl;
-
- snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
-
- list_add_tail(&(local_brick->list), local_bricklist_head);
-
- ret = 0;
- goto out;
- }
- }
-
- for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, local_bricklist_head);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
-
- if (ret) {
- if (local_brick) {
- GF_FREE(local_brick->brick_db_path);
- }
- GF_FREE(local_brick);
- }
-
- return ret;
-}
-
-int
-tier_get_freq_demote(gf_tier_conf_t *tier_conf)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return DEFAULT_DEMOTE_DEGRADED;
- else
- return tier_conf->tier_demote_frequency;
-}
-
-int
-tier_get_freq_promote(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_promote_frequency;
-}
-
-int
-tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_hot_frequency;
-}
-
-int
-tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_cold_frequency;
-}
-
-static int
-tier_check_demote(gfdb_time_t current_time, int freq)
-{
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_false;
-
- else
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq_compact)
-{
- if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
- return _gf_false;
-
- return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
-}
-
-void
-clear_bricklist(struct list_head *brick_list)
-{
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
-
- if (list_empty(brick_list)) {
- return;
- }
-
- list_for_each_entry_safe(local_brick, temp, brick_list, list)
- {
- list_del(&local_brick->list);
- GF_FREE(local_brick->brick_db_path);
- GF_FREE(local_brick);
- }
-}
-
-static void
-set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
-{
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", brick_list, out);
-
- list_for_each_entry(local_brick, brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
- i++;
- }
-out:
- return;
-}
-
-static int
-tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- gf_boolean_t is_hot_tier = args->is_hot_tier;
- int freq = 0;
- int ret = -1;
- const char *tier_type = is_hot_tier ? "hot" : "cold";
-
- this = args->this;
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- tier_conf = &defrag->tier_conf;
-
- freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
- : tier_get_freq_compact_cold(tier_conf);
-
- defrag->tier_conf.compact_mode_switched =
- is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
- : defrag->tier_conf.compact_mode_switched_cold;
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Compact mode %i", defrag->tier_conf.compact_mode_switched);
-
- if (tier_check_compact(tier_conf, current_time, freq)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction on %s tier", tier_type);
-
- args->freq_time = freq;
- ret = tier_compact(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Compaction failed on "
- "%s tier",
- tier_type);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction on %s tier", tier_type);
-
- if (is_hot_tier) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_false;
- } else {
- defrag->tier_conf.compact_mode_switched_cold = _gf_false;
- }
- }
-
-out:
- return ret;
-}
-
-static int
-tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
-{
- if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
- return WM_INTERVAL_EMERG;
-
- return WM_INTERVAL;
-}
-
-/*
- * Main tiering loop. This is called from the promotion and the
- * demotion threads spawned in tier_start().
- *
- * Every second, wake from sleep to perform tasks.
- * 1. Check trigger to migrate data.
- * 2. Check for state changes (pause, unpause, stop).
- */
-static void *
-tier_run(void *in_args)
-{
- dht_conf_t *conf = NULL;
- gfdb_time_t current_time = {0};
- int freq = 0;
- int ret = 0;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- loc_t root_loc = {0};
- int check_watermark = 0;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- migration_args_t *args = in_args;
- GF_VALIDATE_OR_GOTO("tier", args, out);
- GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
-
- this = args->this;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO("tier", conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO("tier", defrag, out);
-
- if (list_empty(args->brick_list)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
- "Brick list for tier is empty. Exiting.");
- goto out;
- }
-
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
- tier_conf = &defrag->tier_conf;
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- while (1) {
- /*
- * Check if a graph switch occurred. If so, stop migration
- * thread. It will need to be restarted manually.
- */
- any = THIS->ctx->active->first;
- xlator = xlator_search_by_name(any, this->name);
-
- if (xlator != this) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Detected graph switch. Exiting migration "
- "daemon.");
- goto out;
- }
-
- gf_defrag_check_pause_tier(tier_conf);
-
- sleep(1);
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_status != "
- "GF_DEFRAG_STATUS_STARTED");
- goto out;
- }
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = 0;
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_cmd == "
- "GF_DEFRAG_CMD_START_DETACH_TIER");
- goto out;
- }
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
- continue;
-
- /* To have proper synchronization amongst all
- * brick holding nodes, so that promotion and demotions
- * start atomically w.r.t promotion/demotion frequency
- * period, all nodes should have their system time
- * in-sync with each other either manually set or
- * using a NTP server*/
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED,
- "Failed to get current time");
- goto out;
- }
-
- check_watermark++;
-
- /* emergency demotion requires frequent watermark monitoring */
- if (check_watermark >=
- tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
- check_watermark = 0;
- if (tier_conf->mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- continue;
- }
- ret = tier_check_watermark(this);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno,
- DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
- continue;
- }
- }
- }
-
- if (args->is_promotion) {
- freq = tier_get_freq_promote(tier_conf);
-
- if (tier_check_promote(tier_conf, current_time, freq)) {
- args->freq_time = freq;
- ret = tier_promote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Promotion failed");
- }
- }
- } else if (args->is_compaction) {
- tier_prepare_compact(args, current_time);
- } else {
- freq = tier_get_freq_demote(tier_conf);
-
- if (tier_check_demote(current_time, freq)) {
- args->freq_time = freq;
- ret = tier_demote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Demotion failed");
- }
- }
- }
-
- /* Check the statfs immediately after the processing threads
- return */
- check_watermark = WM_INTERVAL;
- }
-
- ret = 0;
-out:
-
- args->return_value = ret;
-
- return NULL;
-}
-
-int
-tier_start(xlator_t *this, gf_defrag_info_t *defrag)
-{
- pthread_t promote_thread;
- pthread_t demote_thread;
- pthread_t hot_compact_thread;
- pthread_t cold_compact_thread;
- int ret = -1;
- struct list_head bricklist_hot = {0};
- struct list_head bricklist_cold = {0};
- migration_args_t promotion_args = {0};
- migration_args_t demotion_args = {0};
- migration_args_t hot_compaction_args = {0};
- migration_args_t cold_compaction_args = {0};
- dht_conf_t *conf = NULL;
-
- INIT_LIST_HEAD((&bricklist_hot));
- INIT_LIST_HEAD((&bricklist_cold));
-
- conf = this->private;
-
- tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
- set_brick_list_qpath(&bricklist_hot, _gf_false);
-
- demotion_args.this = this;
- demotion_args.brick_list = &bricklist_hot;
- demotion_args.defrag = defrag;
- demotion_args.is_promotion = _gf_false;
- demotion_args.is_compaction = _gf_false;
-
- ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
- "tierdem");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start demotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto cleanup;
- }
-
- tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
- set_brick_list_qpath(&bricklist_cold, _gf_true);
-
- promotion_args.this = this;
- promotion_args.brick_list = &bricklist_cold;
- promotion_args.defrag = defrag;
- promotion_args.is_promotion = _gf_true;
-
- ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
- "tierpro");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start promotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawned;
- }
-
- hot_compaction_args.this = this;
- hot_compaction_args.brick_list = &bricklist_hot;
- hot_compaction_args.defrag = defrag;
- hot_compaction_args.is_promotion = _gf_false;
- hot_compaction_args.is_compaction = _gf_true;
- hot_compaction_args.is_hot_tier = _gf_true;
-
- ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
- &hot_compaction_args, "tierhcom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedpromote;
- }
-
- cold_compaction_args.this = this;
- cold_compaction_args.brick_list = &bricklist_cold;
- cold_compaction_args.defrag = defrag;
- cold_compaction_args.is_promotion = _gf_false;
- cold_compaction_args.is_compaction = _gf_true;
- cold_compaction_args.is_hot_tier = _gf_false;
-
- ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
- &cold_compaction_args, "tierccom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedhotcompact;
- }
- pthread_join(cold_compact_thread, NULL);
-
-waitforspawnedhotcompact:
- pthread_join(hot_compact_thread, NULL);
-
-waitforspawnedpromote:
- pthread_join(promote_thread, NULL);
-
-waitforspawned:
- pthread_join(demote_thread, NULL);
-
-cleanup:
- clear_bricklist(&bricklist_cold);
- clear_bricklist(&bricklist_hot);
- return ret;
-}
-
-int32_t
-tier_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-out:
- return ret;
-}
-
-int32_t
-tier_migration_get_dst(xlator_t *this, dht_local_t *local)
-{
- dht_conf_t *conf = NULL;
- int32_t ret = -1;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- local->rebalance.target_node = conf->subvolumes[0];
-
- } else if (conf->subvolumes[0] == local->cached_subvol)
- local->rebalance.target_node = conf->subvolumes[1];
- else
- local->rebalance.target_node = conf->subvolumes[0];
-
- if (local->rebalance.target_node)
- ret = 0;
-
-out:
- return ret;
-}
-
-xlator_t *
-tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
-{
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- subvol = TIER_HASHED_SUBVOL;
-
-out:
- return subvol;
-}
-
-static int
-tier_load_externals(xlator_t *this)
-{
- int ret = -1;
- char *libpathfull = (LIBDIR "/libgfdb.so.0");
- get_gfdb_methods_t get_gfdb_methods;
-
- GF_VALIDATE_OR_GOTO("this", this, out);
-
- libhandle = dlopen(libpathfull, RTLD_NOW);
- if (!libhandle) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading libgfdb.so %s\n", dlerror());
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
- if (!get_gfdb_methods) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading get_gfdb_methods()");
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods(&gfdb_methods);
-
- ret = 0;
-
-out:
- if (ret && libhandle)
- dlclose(libhandle);
-
- return ret;
-}
-
-static tier_mode_t
-tier_validate_mode(char *mode)
-{
- int ret = -1;
-
- if (strcmp(mode, "test") == 0) {
- ret = TIER_MODE_TEST;
- } else {
- ret = TIER_MODE_WM;
- }
-
- return ret;
-}
-
-static gf_boolean_t
-tier_validate_compact_mode(char *mode)
-{
- gf_boolean_t ret = _gf_false;
-
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: mode = %s", mode);
-
- if (!strcmp(mode, "on")) {
- ret = _gf_true;
- } else {
- ret = _gf_false;
- }
-
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: ret = %i", ret);
-
- return ret;
-}
-
-int
-tier_init_methods(xlator_t *this)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, err);
-
- conf = this->private;
-
- methods = &(conf->methods);
-
- methods->migration_get_dst_subvol = tier_migration_get_dst;
- methods->migration_other = tier_start;
- methods->migration_needed = tier_migration_needed;
- methods->layout_search = tier_search;
-
- ret = 0;
-err:
- return ret;
-}
-
-static void
-tier_save_vol_name(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *suffix = NULL;
- int name_len = 0;
-
- conf = this->private;
- defrag = conf->defrag;
-
- suffix = strstr(this->name, "-tier-dht");
-
- if (suffix)
- name_len = suffix - this->name;
- else
- name_len = strlen(this->name);
-
- if (name_len > GD_VOLUME_NAME_MAX)
- name_len = GD_VOLUME_NAME_MAX;
-
- strncpy(defrag->tier_conf.volname, this->name, name_len);
- defrag->tier_conf.volname[name_len] = 0;
-}
-
-int
-tier_init(xlator_t *this)
-{
- int ret = -1;
- int freq = 0;
- int maxsize = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *voldir = NULL;
- char *mode = NULL;
- char *paused = NULL;
- tier_mode_t tier_mode = DEFAULT_TIER_MODE;
- gf_boolean_t compact_mode = _gf_false;
-
- ret = dht_init(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
- goto out;
- }
-
- conf = this->private;
-
- ret = tier_init_methods(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init_methods failed");
- goto out;
- }
-
- if (conf->subvolume_cnt != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Invalid number of subvolumes %d", conf->subvolume_cnt);
- goto out;
- }
-
- /* if instatiated from client side initialization is complete. */
- if (!conf->defrag) {
- ret = 0;
- goto out;
- }
-
- /* if instatiated from server side, load db libraries */
- ret = tier_load_externals(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not load externals. Aborting");
- goto out;
- }
-
- defrag = conf->defrag;
-
- defrag->tier_conf.last_demote_qfile_index = 0;
- defrag->tier_conf.last_promote_qfile_index = 0;
-
- defrag->tier_conf.is_tier = 1;
- defrag->this = this;
-
- ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
- if (ret) {
- maxsize = 0;
- }
-
- defrag->tier_conf.tier_max_promote_size = maxsize;
-
- ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_PROMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_promote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_DEMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_demote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_hot_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_cold_frequency = freq;
-
- ret = dict_get_int32(this->options, "watermark-hi", &freq);
- if (ret) {
- freq = DEFAULT_WM_HI;
- }
-
- defrag->tier_conf.watermark_hi = freq;
-
- ret = dict_get_int32(this->options, "watermark-low", &freq);
- if (ret) {
- freq = DEFAULT_WM_LOW;
- }
-
- defrag->tier_conf.watermark_low = freq;
-
- ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_WRITE_FREQ_SEC;
- }
-
- defrag->write_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_READ_FREQ_SEC;
- }
-
- defrag->read_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "tier-max-mb", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_MB;
- }
-
- defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
-
- ret = dict_get_int32(this->options, "tier-max-files", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
- }
-
- defrag->tier_conf.max_migrate_files = freq;
-
- ret = dict_get_int32(this->options, "tier-query-limit",
- &(defrag->tier_conf.query_limit));
- if (ret) {
- defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
- }
-
- ret = dict_get_str(this->options, "tier-compact", &mode);
-
- if (ret) {
- defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
- } else {
- compact_mode = tier_validate_compact_mode(mode);
- /* If compaction is now active, we need to inform the bricks on
- the hot and cold tier of this. See dht-common.h for more. */
- defrag->tier_conf.compact_active = compact_mode;
- if (compact_mode) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- }
- }
-
- ret = dict_get_str(this->options, "tier-mode", &mode);
- if (ret) {
- defrag->tier_conf.mode = DEFAULT_TIER_MODE;
- } else {
- tier_mode = tier_validate_mode(mode);
- defrag->tier_conf.mode = tier_mode;
- }
-
- pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = dict_get_str(this->options, "tier-pause", &paused);
-
- if (paused && strcmp(paused, "on") == 0)
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = mkdir_p(voldir, 0777, _gf_true);
- if (ret == -1 && errno != EEXIST) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
-
- GF_FREE(voldir);
- goto out;
- }
-
- GF_FREE(voldir);
-
- ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0) {
- GF_FREE(promotion_qfile);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Promote/demote frequency %d/%d "
- "Write/Read freq thresholds %d/%d",
- defrag->tier_conf.tier_promote_frequency,
- defrag->tier_conf.tier_demote_frequency,
- defrag->write_freq_threshold, defrag->read_freq_threshold);
-
- tier_save_vol_name(this);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-int
-tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
-{
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused with op_ret %d", op_ret);
-
- return op_ret;
-}
-
-int
-tier_cli_pause(void *data)
-{
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- this = data;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, exit);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
-
- gf_defrag_pause_tier(this, defrag);
-
- ret = 0;
-exit:
- return ret;
-}
-
-int
-tier_reconfigure(xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *mode = NULL;
- int migrate_mb = 0;
- gf_boolean_t req_pause = _gf_false;
- int ret = 0;
- call_frame_t *frame = NULL;
- gf_boolean_t last_compact_setting = _gf_false;
-
- conf = this->private;
-
- if (conf->defrag) {
- defrag = conf->defrag;
- GF_OPTION_RECONF("tier-max-promote-file-size",
- defrag->tier_conf.tier_max_promote_size, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-promote-frequency",
- defrag->tier_conf.tier_promote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-demote-frequency",
- defrag->tier_conf.tier_demote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
- options, int32, out);
-
- last_compact_setting = defrag->tier_conf.compact_active;
-
- GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
- options, bool, out);
-
- if (last_compact_setting != defrag->tier_conf.compact_active) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "compact mode switched");
- }
-
- GF_OPTION_RECONF("tier-hot-compact-frequency",
- defrag->tier_conf.tier_compact_hot_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-cold-compact-frequency",
- defrag->tier_conf.tier_compact_cold_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-mode", mode, options, str, out);
- defrag->tier_conf.mode = tier_validate_mode(mode);
-
- GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
- defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
- 1024;
-
- GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
-
- if (req_pause == _gf_true) {
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
-
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
- ret = synctask_new(this->ctx->env, tier_cli_pause,
- tier_cli_pause_done, frame, this);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "pause tier failed on reconfigure");
- }
- } else {
- ret = gf_defrag_resume_tier(this, defrag);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "resume tier failed on reconfigure");
- }
- }
- }
-
-out:
- return dht_reconfigure(this, options);
-}
-
-void
-tier_fini(xlator_t *this)
-{
- if (libhandle)
- dlclose(libhandle);
-
- GF_FREE(demotion_qfile);
- GF_FREE(promotion_qfile);
-
- dht_fini(this);
-}
-
-class_methods_t class_methods = {.init = tier_init,
- .fini = tier_fini,
- .reconfigure = tier_reconfigure,
- .notify = dht_notify};
-
-struct xlator_fops fops = {
-
- .lookup = dht_lookup,
- .create = tier_create,
- .mknod = dht_mknod,
-
- .open = dht_open,
- .statfs = tier_statfs,
- .opendir = dht_opendir,
- .readdir = tier_readdir,
- .readdirp = tier_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = tier_unlink,
- .link = tier_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
- .fallocate = dht_fallocate,
- .discard = dht_discard,
- .zerofill = dht_zerofill,
-};
-
-struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
deleted file mode 100644
index f0ffdfcd769..00000000000
--- a/xlators/cluster/dht/src/tier.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_H_
-#define _TIER_H_
-
-/******************************************************************************/
-/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
-#include "dht-common.h"
-#include "xlator.h"
-#include <signal.h>
-#include <fnmatch.h>
-#include <signal.h>
-
-/*
- * Size of timer wheel. We would not promote or demote less
- * frequently than this number.
- */
-#define TIMER_SECS 3600
-
-#include "gfdb_data_store.h"
-#include <ctype.h>
-#include <sys/stat.h>
-
-#define PROMOTION_QFILE "promotequeryfile"
-#define DEMOTION_QFILE "demotequeryfile"
-
-#define TIER_HASHED_SUBVOL conf->subvolumes[0]
-#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
-
-#define GET_QFILE_PATH(is_promotion) \
- (is_promotion) ? promotion_qfile : demotion_qfile
-
-typedef struct tier_qfile_array {
- int *fd_array;
- ssize_t array_size;
- ssize_t next_index;
- /* Indicate the number of exhuasted FDs*/
- ssize_t exhausted_count;
-} tier_qfile_array_t;
-
-typedef struct _query_cbk_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- /* This is write */
- int query_fd;
- int is_promotion;
- int is_compaction;
- /* This is for read */
- tier_qfile_array_t *qfile_array;
-} query_cbk_args_t;
-
-int
-gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-typedef struct gfdb_brick_info {
- gfdb_time_t *time_stamp;
- gf_boolean_t _gfdb_promote;
- query_cbk_args_t *_query_cbk_args;
-} gfdb_brick_info_t;
-
-typedef struct brick_list {
- xlator_t *xlator;
- char *brick_db_path;
- char brick_name[NAME_MAX];
- char qfile_path[PATH_MAX];
- struct list_head list;
-} tier_brick_list_t;
-
-typedef struct _dm_thread_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- struct list_head *brick_list;
- int freq_time;
- int return_value;
- int is_promotion;
- int is_compaction;
- gf_boolean_t is_hot_tier;
-} migration_args_t;
-
-typedef enum tier_watermark_op_ {
- TIER_WM_NONE = 0,
- TIER_WM_LOW,
- TIER_WM_HI,
- TIER_WM_MID
-} tier_watermark_op_t;
-
-#define DEFAULT_PROMOTE_FREQ_SEC 120
-#define DEFAULT_DEMOTE_FREQ_SEC 120
-#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
-#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
-#define DEFAULT_DEMOTE_DEGRADED 1
-#define DEFAULT_WRITE_FREQ_SEC 0
-#define DEFAULT_READ_FREQ_SEC 0
-#define DEFAULT_WM_LOW 75
-#define DEFAULT_WM_HI 90
-#define DEFAULT_TIER_MODE TIER_MODE_TEST
-#define DEFAULT_COMP_MODE _gf_true
-#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
-#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
-#define DEFAULT_TIER_QUERY_LIMIT 100
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym
deleted file mode 100644
index 60205d145b6..00000000000
--- a/xlators/cluster/dht/src/tier.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-tier_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
index 49bf18b9fe6..771452963d1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_mock.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "dht-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
int
dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
index 72890070835..c94a1d0a2e1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -9,8 +9,8 @@
*/
#include "dht-common.h"
-#include "logging.h"
-#include "xlator.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/xlator.h>
#include <inttypes.h>
#include <stdarg.h>
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c
index b3338248569..03162ae05a9 100644
--- a/xlators/cluster/ec/src/ec-code.c
+++ b/xlators/cluster/ec/src/ec-code.c
@@ -14,7 +14,7 @@
#include <sys/stat.h>
#include <ctype.h>
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "ec-mem-types.h"
#include "ec-code.h"
diff --git a/xlators/cluster/ec/src/ec-code.h b/xlators/cluster/ec/src/ec-code.h
index 355209c3944..75fb35d93e3 100644
--- a/xlators/cluster/ec/src/ec-code.h
+++ b/xlators/cluster/ec/src/ec-code.h
@@ -11,8 +11,8 @@
#ifndef __EC_CODE_H__
#define __EC_CODE_H__
-#include "xlator.h"
-#include "list.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
#include "ec-types.h"
#include "ec-galois.h"
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 454c2c72c95..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -11,14 +11,14 @@
#include <fnmatch.h>
#include "libxlator.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
#include "ec-messages.h"
-#include "quota-common-utils.h"
+#include <glusterfs/quota-common-utils.h>
#define EC_QUOTA_PREFIX "trusted.glusterfs.quota."
@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
"links: %u-%u, uid: %u-%u, gid: %u-%u, "
"rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
", "
- "mode: %o-%o)",
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
@@ -342,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -485,22 +485,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
tmp = NULL;
- len = dict_serialized_length(lockinfo);
- if (len < 0) {
- err = len;
-
- goto out;
- }
- ptr = GF_MALLOC(len, gf_common_mt_char);
- if (ptr == NULL) {
- err = -ENOMEM;
-
- goto out;
- }
- err = dict_serialize(lockinfo, ptr);
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
if (err != 0) {
goto out;
}
+
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
err = dict_set_dynptr(dict, key, ptr, len);
if (err != 0) {
@@ -739,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -776,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 737f7fda882..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "byte-order.h"
-#include "hashfn.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/hashfn.h>
#include "ec-mem-types.h"
#include "ec-types.h"
@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
UNLOCK(&fd->lock);
}
-static int
-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
+static uintptr_t
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
{
int i = 0;
int count = 0;
ec_t *ec = NULL;
ec_fd_t *fd_ctx = NULL;
+ uintptr_t need_open = 0;
ec = this->private;
- *need_open = 0;
fd_ctx = ec_fd_get(fd, this);
if (!fd_ctx)
@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
{
for (i = 0; i < ec->nodes; i++) {
if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
- (ec->xl_up & (1 << i))) {
+ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
fd_ctx->fd_status[i] = EC_FD_OPENING;
- *need_open |= (1 << i);
+ need_open |= (1 << i);
count++;
}
}
@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
* then ignore fixing the fd as it has been
* requested from heal operation.
*/
- if (count >= ec->fragments)
- count = 0;
+ if (count >= ec->fragments) {
+ need_open = 0;
+ }
- return count;
+ return need_open;
}
static gf_boolean_t
@@ -96,11 +97,11 @@ ec_is_fd_fixable(fd_t *fd)
}
static void
-ec_fix_open(ec_fop_data_t *fop)
+ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
{
- int call_count = 0;
uintptr_t need_open = 0;
int ret = 0;
+ int32_t flags = 0;
loc_t loc = {
0,
};
@@ -109,9 +110,10 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
/* Evaluate how many remote fd's to be opened */
- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open);
- if (!call_count)
+ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
+ if (need_open == 0) {
goto out;
+ }
loc.inode = inode_ref(fop->fd->inode);
gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
@@ -120,12 +122,15 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
}
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
if (IA_IFDIR == fop->fd->inode->ia_type) {
- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
&fop->loc[0], fop->fd, NULL);
} else {
- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
- &loc, fop->fd->flags, fop->fd, NULL);
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+ flags, fop->fd, NULL);
}
out:
@@ -225,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -311,16 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -494,12 +502,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error)
}
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error)
+ec_resume_parent(ec_fop_data_t *fop)
{
ec_fop_data_t *parent;
+ int32_t error = 0;
parent = fop->parent;
if (parent != NULL) {
+ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
+ error = fop->error;
+ }
ec_trace("RESUME_PARENT", fop, "error=%u", error);
fop->parent = NULL;
ec_resume(parent, error);
@@ -592,6 +604,8 @@ ec_internal_op(ec_fop_data_t *fop)
return _gf_true;
if (fop->id == GF_FOP_FXATTROP)
return _gf_true;
+ if (fop->id == GF_FOP_OPEN)
+ return _gf_true;
return _gf_false;
}
@@ -602,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -613,24 +627,46 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
-int32_t
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
+static int32_t
ec_child_select(ec_fop_data_t *fop)
{
ec_t *ec = fop->xl->private;
@@ -644,6 +680,9 @@ ec_child_select(ec_fop_data_t *fop)
* unlock should go on all subvols where lock is performed*/
if (fop->parent && !ec_internal_op(fop)) {
fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
+ }
}
if ((fop->mask & ~ec->xl_up) != 0) {
@@ -684,15 +723,18 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
- ec_sleep(fop);
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
return 1;
}
@@ -772,6 +814,8 @@ ec_dispatch_one(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = 1;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
@@ -806,6 +850,8 @@ ec_dispatch_inc(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -819,6 +865,8 @@ ec_dispatch_all(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -837,6 +885,8 @@ ec_dispatch_min(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = count = ec->fragments;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
idx = fop->first - 1;
@@ -851,6 +901,23 @@ ec_dispatch_min(ec_fop_data_t *fop)
}
}
+void
+ec_succeed_all(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ /* Simulate a successful execution on all bricks */
+ ec_trace("SUCCEED", fop, "");
+
+ fop->good = fop->remaining;
+ fop->remaining = 0;
+ }
+}
+
ec_lock_t *
ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
{
@@ -1372,27 +1439,28 @@ ec_get_size_version(ec_lock_link_t *link)
!ec_is_data_fop(fop->id))
link->optimistic_changelog = _gf_true;
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
set_dirty = ec_set_dirty_flag(link, ctx, dirty);
/* If ec metadata has already been retrieved, do not try again. */
- if (ctx->have_info && (!set_dirty)) {
+ if (ctx->have_info) {
if (ec_is_data_fop(fop->id)) {
fop->healing |= lock->healing;
}
- return;
+ if (!set_dirty)
+ goto unlock;
}
/* Determine if there's something we need to retrieve for the current
* operation. */
if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
(lock->loc.inode->ia_type != IA_INVAL)) {
- return;
+ goto unlock;
}
- memset(&loc, 0, sizeof(loc));
-
- LOCK(&lock->loc.inode->lock);
-
changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
if (link->waiting_flags) {
/* This fop needs to wait until all its flags are cleared which
@@ -1403,6 +1471,7 @@ ec_get_size_version(ec_lock_link_t *link)
GF_ASSERT(!changed_flags);
}
+unlock:
UNLOCK(&lock->loc.inode->lock);
if (!changed_flags)
@@ -1814,6 +1883,10 @@ ec_lock_acquired(ec_lock_link_t *link)
LOCK(&lock->loc.inode->lock);
lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
+ }
ec_lock_update_fd(lock, fop);
ec_lock_wake_shared(lock, &list);
@@ -1824,7 +1897,8 @@ ec_lock_acquired(ec_lock_link_t *link)
if (fop->use_fd &&
(link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
- ec_fix_open(fop);
+ /* Try to reopen closed fd's only if lock has succeeded. */
+ ec_fix_open(fop, lock->mask);
}
ec_lock_resume_shared(&list);
@@ -1838,15 +1912,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ec_lock_link_t *link = NULL;
ec_lock_t *lock = NULL;
+ link = fop->data;
+ lock = link->lock;
if (op_ret >= 0) {
- link = fop->data;
- lock = link->lock;
lock->mask = lock->good_mask = fop->good;
lock->healing = 0;
ec_lock_acquired(link);
ec_lock(fop->parent);
} else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
"Failed to complete preop lock");
}
@@ -2177,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
@@ -2214,6 +2293,23 @@ ec_unlock_lock(ec_lock_link_t *link)
}
}
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
+
int32_t
ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
@@ -2229,6 +2325,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
ctx = lock->ctx;
if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
EC_MSG_SIZE_VERS_UPDATE_FAIL,
"Failed to update version and size. %s", ec_msg_str(fop));
@@ -2371,37 +2473,47 @@ ec_update_info(ec_lock_link_t *link)
uint64_t dirty[2] = {0, 0};
uint64_t size;
ec_t *ec = NULL;
+ uintptr_t mask;
lock = link->lock;
ctx = lock->ctx;
ec = link->fop->xl->private;
/* pre_version[*] will be 0 if have_version is false */
- version[0] = ctx->post_version[0] - ctx->pre_version[0];
- version[1] = ctx->post_version[1] - ctx->pre_version[1];
+ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
+ ctx->pre_version[EC_DATA_TXN];
+ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
+ ctx->pre_version[EC_METADATA_TXN];
size = ctx->post_size - ctx->pre_size;
/* If we set the dirty flag for update fop, we have to unset it.
* If fop has failed on some bricks, leave the dirty as marked. */
+
if (lock->unlock_now) {
+ if (version[EC_DATA_TXN]) {
+ /*A data fop will have difference in post and pre version
+ *and for data fop we send writes on healing bricks also */
+ mask = lock->good_mask | lock->healing;
+ } else {
+ mask = lock->good_mask;
+ }
/* Ensure that nodes are up while doing final
* metadata update.*/
- if (!(ec->node_mask & ~lock->good_mask) &&
- !(ec->node_mask & ~ec->xl_up)) {
- if (ctx->dirty[0] != 0) {
- dirty[0] = -1;
+ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[EC_DATA_TXN] != 0) {
+ dirty[EC_DATA_TXN] = -1;
}
- if (ctx->dirty[1] != 0) {
- dirty[1] = -1;
+ if (ctx->dirty[EC_METADATA_TXN] != 0) {
+ dirty[EC_METADATA_TXN] = -1;
}
/*If everything is fine and we already
*have version xattr set on entry, there
*is no need to update version again*/
- if (ctx->pre_version[0]) {
- version[0] = 0;
+ if (ctx->pre_version[EC_DATA_TXN]) {
+ version[EC_DATA_TXN] = 0;
}
- if (ctx->pre_version[1]) {
- version[1] = 0;
+ if (ctx->pre_version[EC_METADATA_TXN]) {
+ version[EC_METADATA_TXN] = 0;
}
} else {
link->optimistic_changelog = _gf_false;
@@ -2410,8 +2522,8 @@ ec_update_info(ec_lock_link_t *link)
memset(ctx->dirty, 0, sizeof(ctx->dirty));
}
- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) ||
- (dirty[1] != 0)) {
+ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
+ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
ec_update_size_version(link, version, size, dirty);
return _gf_true;
}
@@ -2453,13 +2565,20 @@ ec_lock_release(ec_t *ec, inode_t *inode)
goto done;
}
lock = ctx->inode_lock;
- if ((lock == NULL) || !lock->acquired || lock->release) {
+ if ((lock == NULL) || lock->release) {
goto done;
}
gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
inode);
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
/* The lock is not marked to be released, so the frozen list should be
* empty. */
GF_ASSERT(list_empty(&lock->frozen));
@@ -2911,3 +3030,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
__ec_manager(fop, error);
}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index bf41c0086f8..51493612ac6 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -11,8 +11,7 @@
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
-#include "xlator.h"
-
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#include "ec-data.h"
typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
@@ -26,6 +25,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
#define EC_FLAG_LOCK_SHARED 0x0001
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
enum _ec_xattrop_flags {
EC_FLAG_XATTROP,
EC_FLAG_DATA_DIRTY,
@@ -54,9 +77,12 @@ enum _ec_xattrop_flags {
#define EC_SELFHEAL_BIT 62
-#define EC_MINIMUM_ONE -1
-#define EC_MINIMUM_MIN -2
-#define EC_MINIMUM_ALL -3
+#define EC_MINIMUM_ONE (1 << 6)
+#define EC_MINIMUM_MIN (2 << 6)
+#define EC_MINIMUM_ALL (3 << 6)
+#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
+#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
+#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
#define EC_UPDATE_DATA 1
#define EC_UPDATE_META 2
@@ -163,11 +189,14 @@ void
ec_dispatch_one(ec_fop_data_t *fop);
void
+ec_succeed_all(ec_fop_data_t *fop);
+
+void
ec_sleep(ec_fop_data_t *fop);
void
ec_resume(ec_fop_data_t *fop, int32_t error);
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error);
+ec_resume_parent(ec_fop_data_t *fop);
void
ec_manager(ec_fop_data_t *fop, int32_t error);
@@ -190,4 +219,16 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
void
ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
int32_t ret_status);
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop);
+void
+ec_set_entry_healing(ec_fop_data_t *fop);
+void
+ec_reset_entry_healing(ec_fop_data_t *fop);
+char *
+ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index fae8843a679..06388833546 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
@@ -98,7 +97,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk)
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data)
{
@@ -151,7 +150,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
fop->refs = 1;
fop->flags = flags;
- fop->minimum = minimum;
+ fop->minimum = EC_FOP_MINIMUM(fop_flags);
+ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
fop->mask = target;
fop->wind = wind;
@@ -201,11 +201,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
{
ec_t *ec = fop->xl->private;
+ *notify = _gf_false;
+
if (!list_empty(&fop->pending_list)) {
LOCK(&ec->lock);
{
list_del_init(&fop->pending_list);
- *notify = list_empty(&ec->pending_fops);
+ *notify = __ec_is_last_fop(ec);
}
UNLOCK(&ec->lock);
}
@@ -271,7 +273,7 @@ ec_fop_data_release(ec_fop_data_t *fop)
loc_wipe(&fop->loc[1]);
GF_FREE(fop->errstr);
- ec_resume_parent(fop, fop->error);
+ ec_resume_parent(fop);
ec_fop_cleanup(fop);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 112536d554c..c8a74ffe1ed 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data);
void
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index ec4cefb1e78..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -8,15 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/****************************************************************
@@ -127,13 +123,15 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
UNLOCK(&fop->fd->lock);
@@ -219,7 +217,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.opendir = func};
@@ -233,7 +231,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_opendir,
+ target, fop_flags, ec_wind_opendir,
ec_manager_opendir, callback, data);
if (fop == NULL) {
goto out;
@@ -388,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
@@ -515,7 +520,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdir = func};
@@ -529,7 +534,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readdir,
+ target, fop_flags, ec_wind_readdir,
ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
@@ -585,7 +590,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdirp = func};
@@ -599,7 +604,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readdirp, ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index f5c38e80dd7..53d27d895c3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -218,10 +215,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.create != NULL) {
- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->fd, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->fd,
+ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -262,7 +259,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.create = func};
@@ -275,7 +272,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
ec_wind_create, ec_manager_create, callback,
data);
if (fop == NULL) {
@@ -390,9 +387,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.link != NULL) {
- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -432,9 +430,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
}
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.link = func};
ec_fop_data_t *fop = NULL;
@@ -446,7 +444,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
ec_wind_link, ec_manager_link, callback, data);
if (fop == NULL) {
goto out;
@@ -569,9 +567,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mkdir != NULL) {
- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -613,9 +612,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mkdir = func};
ec_fop_data_t *fop = NULL;
@@ -627,7 +626,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
ec_wind_mkdir, ec_manager_mkdir, callback, data);
if (fop == NULL) {
goto out;
@@ -773,9 +772,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mknod != NULL) {
- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata)
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mknod = func};
ec_fop_data_t *fop = NULL;
@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
ec_wind_mknod, ec_manager_mknod, callback, data);
if (fop == NULL) {
goto out;
@@ -931,10 +931,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rename != NULL) {
- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
+ &cbk->iatt[4], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.rename = func};
@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
ec_wind_rename, ec_manager_rename, callback,
data);
if (fop == NULL) {
@@ -1083,9 +1083,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rmdir != NULL) {
- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata)
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.rmdir = func};
ec_fop_data_t *fop = NULL;
@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
ec_wind_rmdir, ec_manager_rmdir, callback, data);
if (fop == NULL) {
goto out;
@@ -1237,10 +1237,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.symlink != NULL) {
- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.symlink = func};
@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum,
- ec_wind_symlink, ec_manager_symlink, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
+ fop_flags, ec_wind_symlink, ec_manager_symlink,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1392,9 +1392,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.unlink != NULL) {
- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.unlink = func};
@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
ec_wind_unlink, ec_manager_unlink, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index 2858d829c73..07edf8a7fec 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -11,240 +11,244 @@
#ifndef __EC_FOPS_H__
#define __EC_FOPS_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "ec-types.h"
#include "ec-common.h"
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata);
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata);
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata);
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata);
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata);
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata);
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata);
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata);
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata);
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata);
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata);
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata);
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata);
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata);
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata);
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata);
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata);
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata);
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata);
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata);
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata);
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata);
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata);
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata);
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata);
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata);
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata);
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata);
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata);
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata);
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata);
#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-galois.c b/xlators/cluster/ec/src/ec-galois.c
index 8cb4dc2e4e3..6e4990c71f5 100644
--- a/xlators/cluster/ec/src/ec-galois.c
+++ b/xlators/cluster/ec/src/ec-galois.c
@@ -10,9 +10,6 @@
#include <string.h>
-#include "mem-pool.h"
-#include "list.h"
-
#include "ec-mem-types.h"
#include "ec-gf8.h"
#include "ec-helpers.h"
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index 2fd10526494..884deb93669 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -8,16 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/* FOP: flush */
@@ -150,9 +147,41 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
}
}
+static int32_t
+ec_validate_fd(fd_t *fd, xlator_t *xl)
+{
+ uint64_t iversion = 0;
+ uint64_t fversion = 0;
+ ec_inode_t *inode_ctx = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ fversion = fd_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ LOCK(&fd->inode->lock);
+ {
+ inode_ctx = __ec_inode_get(fd->inode, xl);
+ if (inode_ctx) {
+ iversion = inode_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+ if (fversion < iversion) {
+ return EBADF;
+ }
+ return 0;
+}
+
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.flush = func};
ec_fop_data_t *fop = NULL;
@@ -164,7 +193,17 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
ec_wind_flush, ec_manager_flush, callback, data);
if (fop == NULL) {
goto out;
@@ -366,9 +405,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
}
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsync = func};
ec_fop_data_t *fop = NULL;
@@ -380,7 +419,17 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
ec_wind_fsync, ec_manager_fsync, callback, data);
if (fop == NULL) {
goto out;
@@ -553,7 +602,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsyncdir = func};
@@ -566,9 +615,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum,
- ec_wind_fsyncdir, ec_manager_fsyncdir, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -848,7 +897,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.lookup = func};
@@ -862,7 +911,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_lookup,
+ target, fop_flags, ec_wind_lookup,
ec_manager_lookup, callback, data);
if (fop == NULL) {
goto out;
@@ -1033,7 +1082,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.statfs = func};
@@ -1047,7 +1096,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_statfs,
+ target, fop_flags, ec_wind_statfs,
ec_manager_statfs, callback, data);
if (fop == NULL) {
goto out;
@@ -1270,7 +1319,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.xattrop = func};
@@ -1283,9 +1332,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum,
- ec_wind_xattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
+ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1343,7 +1392,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.fxattrop = func};
@@ -1356,9 +1405,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum,
- ec_wind_fxattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1507,8 +1556,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
}
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata)
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata)
{
ec_cbk_t callback = {.ipc = func};
ec_fop_data_t *fop = NULL;
@@ -1520,7 +1570,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
ec_wind_ipc, ec_manager_ipc, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 9eb5b856932..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -8,16 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
-#include "byte-order.h"
-#include "syncop.h"
-#include "syncop-utils.h"
-#include "cluster-syncop.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/cluster-syncop.h>
#include "ec.h"
-#include "ec-mem-types.h"
#include "ec-types.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -72,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -103,6 +102,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
}
/* FOP: heal */
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+}
+
uintptr_t
ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
{
@@ -325,16 +366,16 @@ ec_heal_data_block(ec_heal_t *heal)
/* FOP: fheal */
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata)
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
{
ec_fd_t *ctx = ec_fd_get(fd, this);
if (ctx != NULL) {
gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
ctx->open);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
xdata);
}
}
@@ -954,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -977,6 +1019,7 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
int estale_count = 0;
int i = 0;
call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
ec = name_data->frame->this->private;
EC_REPLIES_ALLOC(replies, ec->nodes);
@@ -985,12 +1028,16 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
goto out;
}
+ loc.parent = inode_ref(name_data->parent);
loc.inode = inode_new(name_data->parent->table);
if (!loc.inode) {
ret = -ENOMEM;
goto out;
}
- gf_uuid_parse(key, loc.gfid);
+
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
output = alloca0(ec->nodes);
ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
replies, output, name_data->frame, ec->xl, &loc, NULL);
@@ -1003,6 +1050,11 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
estale_count++;
else
name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
}
}
@@ -1122,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1366,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1443,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1467,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1486,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1493,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1538,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1559,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1590,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1909,16 +1974,16 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
case EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
- 0, 0, NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -1933,7 +1998,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
/*Takes lock */
void
ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -1944,7 +2009,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, ec_manager_heal_block, callback, heal);
if (fop == NULL)
goto out;
@@ -1955,19 +2020,21 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- ec_heal_t *heal = fop->data;
+ ec_heal_t *heal = cookie;
- fop->heal = NULL;
+ if (heal->fop) {
+ heal->fop->heal = NULL;
+ }
heal->fop = NULL;
heal->error = op_ret < 0 ? op_errno : 0;
syncbarrier_wake(heal->data);
@@ -2259,9 +2326,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
loc.inode = inode_ref(fd->inode);
gf_uuid_copy(loc.gfid, fd->inode->gfid);
- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
- output, frame, ec->xl, &loc, &source_buf,
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ret = -ENOTCONN;
@@ -2429,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2446,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2481,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2499,32 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
- !ec->shd.iamshd, &need_heal);
-
- if (need_heal == EC_HEAL_NONEED) {
- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
- "Heal is not required for : %s ", uuid_utoa(loc->gfid));
- goto out;
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
}
- msources = alloca0(ec->nodes);
- mhealed_sinks = alloca0(ec->nodes);
- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
- if (ret == 0) {
- mgood = ec_char_array_to_mask(msources, ec->nodes);
- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
- } else {
- op_ret = -1;
- op_errno = -ret;
- }
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2538,15 +2661,27 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
op_ret = -1;
op_errno = -ret;
}
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
out:
+ ec_reset_entry_healing(fop);
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2593,8 +2728,8 @@ void
ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
- NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2603,13 +2738,31 @@ void
ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
{
int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
- NULL, fop);
+ frame, fop);
+out:
if (ret < 0) {
ec_fop_set_error(fop, ENOMEM);
ec_heal_fail(ec, fop);
}
+
+ if (frame)
+ STACK_DESTROY(frame->root);
}
void
@@ -2650,11 +2803,33 @@ ec_handle_healers_done(ec_fop_data_t *fop)
ec_launch_heal(ec, heal_fop);
}
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
+
+ loc = &fop->loc[0];
+
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
+
void
ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
{
gf_boolean_t can_heal = _gf_true;
ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
if (fop->req_frame == NULL) {
LOCK(&ec->lock);
@@ -2662,8 +2837,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
if ((ec->background_heals > 0) &&
(ec->heal_wait_qlen + ec->background_heals) >
(ec->heal_waiters + ec->healers)) {
- list_add_tail(&fop->healer, &ec->heal_waiting);
- ec->heal_waiters++;
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
fop = __ec_dequeue_heals(ec);
} else {
can_heal = _gf_false;
@@ -2673,8 +2853,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
}
if (can_heal) {
- if (fop)
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
ec_launch_heal(ec, fop);
+ }
} else {
gf_msg_debug(this->name, 0,
"Max number of heals are "
@@ -2682,12 +2866,15 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
ec_fop_set_error(fop, EBUSY);
ec_heal_fail(ec, fop);
}
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
}
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata)
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -2703,7 +2890,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
if (frame && frame->local)
goto fail;
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, NULL, callback, data);
err = ENOMEM;
@@ -2729,15 +2916,27 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
{
ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+
+ if (last_fop)
+ ec_pending_fops_completed(ec);
+
return 0;
}
@@ -2777,6 +2976,10 @@ ec_replace_brick_heal_wrap(void *opaque)
itable = ec->xl->itable;
else
goto out;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
+
ret = ec_replace_heal(ec, itable->root);
out:
return ret;
@@ -2787,14 +2990,15 @@ ec_launch_replace_heal(ec_t *ec)
{
int ret = -1;
- if (!ec)
- return ret;
ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
ec_replace_heal_done, NULL, ec);
+
if (ret < 0) {
gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
ret);
+ ec_replace_heal_done(-1, NULL, ec);
}
+
return ret;
}
@@ -2826,7 +3030,7 @@ out:
static int32_t
_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
gf_boolean_t self_locked, int32_t lock_count,
- ec_heal_need_t *need_heal)
+ ec_heal_need_t *need_heal, uint64_t *versions)
{
int i = 0;
int source_count = 0;
@@ -2836,11 +3040,18 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_NONEED;
if (self_locked || lock_count == 0) {
for (i = 0; i < ec->nodes; i++) {
- if (dirty[i]) {
+ if (dirty[i] || (versions[i] != versions[0])) {
*need_heal = EC_HEAL_MUST;
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
@@ -2852,6 +3063,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_MUST;
goto out;
}
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
}
}
} else {
@@ -2872,7 +3086,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
unsigned char *healed_sinks = NULL;
uint64_t *meta_versions = NULL;
int ret = 0;
- int i = 0;
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
@@ -2885,15 +3098,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
- for (i = 1; i < ec->nodes; i++) {
- if (meta_versions[i] != meta_versions[0]) {
- *need_heal = EC_HEAL_MUST;
- goto out;
- }
- }
- }
+ need_heal, meta_versions);
out:
return ret;
}
@@ -2929,7 +3134,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -2957,7 +3162,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -3055,10 +3260,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
need_heal:
ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
need_heal);
-
- if (!self_locked && *need_heal == EC_HEAL_MUST) {
- *need_heal = EC_HEAL_MAYBE;
- }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -3144,7 +3345,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
_gf_false, &need_heal);
- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
goto set_heal;
}
need_heal = EC_HEAL_NONEED;
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 130790c66ac..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -8,15 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
#include "ec.h"
#include "ec-messages.h"
#include "ec-heald.h"
#include "ec-mem-types.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "protocol-common.h"
#define NTH_INDEX_HEALER(this, n) \
@@ -63,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + 60;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -71,6 +70,11 @@ disabled_loop:
break;
}
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
+
ret = healer->rerun;
healer->rerun = 0;
@@ -152,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
@@ -241,9 +304,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
goto out;
}
+ _mask_cancellation();
ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
healer, ec_shd_index_heal, xdata,
ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
out:
if (xdata)
dict_unref(xdata);
@@ -263,6 +328,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int ret = 0;
ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (ec->xl_up_count <= ec->fragments) {
return -ENOTCONN;
}
@@ -305,11 +375,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
ec_t *ec = NULL;
loc_t loc = {0};
+ int ret = -1;
ec = healer->this->private;
loc.inode = inode;
- return syncop_ftw(ec->xl_list[healer->subvol], &loc,
- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
}
void *
@@ -317,13 +391,16 @@ ec_shd_index_healer(void *data)
{
struct subvol_healer *healer = NULL;
xlator_t *this = NULL;
+ int run = 0;
healer = data;
THIS = this = healer->this;
ec_t *ec = this->private;
for (;;) {
- ec_shd_healer_wait(healer);
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
if (ec->xl_up_count > ec->fragments) {
gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +429,12 @@ ec_shd_full_healer(void *data)
rootloc.inode = this->itable->root;
for (;;) {
- pthread_mutex_lock(&healer->mutex);
- {
- run = __ec_shd_healer_wait(healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock(&healer->mutex);
-
- if (!run)
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
break;
+ } else if (run == 0) {
+ continue;
+ }
if (ec->xl_up_count > ec->fragments) {
gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -429,6 +502,9 @@ unlock:
int
ec_shd_full_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
ec_shd_full_healer);
}
@@ -436,6 +512,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
int
ec_shd_index_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
ec_shd_index_healer);
}
@@ -562,3 +641,41 @@ out:
dict_del(output, this->name);
return ret;
}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 4d141d767e5..6c7da4edc10 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -11,9 +11,9 @@
#ifndef __EC_HEALD_H__
#define __EC_HEALD_H__
-#include "xlator.h"
-
-#include "ec-types.h"
+#include "ec-types.h" // for ec_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/globals.h" // for xlator_t
int
ec_xl_op(xlator_t *this, dict_t *input, dict_t *output);
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
void
ec_shd_index_healer_wake(ec_t *ec);
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index dec39b9d2aa..48f54475e01 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -10,7 +10,7 @@
#include <libgen.h>
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec.h"
#include "ec-mem-types.h"
@@ -476,7 +476,7 @@ out:
int32_t
ec_loc_setup_path(xlator_t *xl, loc_t *loc)
{
- uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *name;
int32_t ret = -EINVAL;
@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl)
memset(ctx, 0, sizeof(*ctx));
INIT_LIST_HEAD(&ctx->heal);
INIT_LIST_HEAD(&ctx->stripe_cache.lru);
+ ctx->heal_count = 0;
value = (uint64_t)(uintptr_t)ctx;
if (__inode_ctx_set(inode, xl, &value) != 0) {
GF_FREE(ctx);
@@ -752,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
{
int i = 0;
ec_fd_t *ctx = NULL;
+ ec_inode_t *ictx = NULL;
uint64_t value = 0;
ec_t *ec = xl->private;
@@ -774,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
GF_FREE(ctx);
return NULL;
}
+ /* Only refering bad-version so no need for lock
+ * */
+ ictx = __ec_inode_get(fd->inode, xl);
+ if (ictx) {
+ ctx->bad_version = ictx->bad_version;
+ }
}
} else {
ctx = (ec_fd_t *)(uintptr_t)value;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index c02d5401439..dad5f4d7018 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -135,7 +132,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state)
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata)
{
ec_cbk_t callback = {.access = func};
@@ -149,7 +146,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_access,
+ target, fop_flags, ec_wind_access,
ec_manager_access, callback, data);
if (fop == NULL) {
goto out;
@@ -393,15 +390,34 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
int32_t
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- fop_getxattr_cbk_t func = fop->data;
+ fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
dict_t *dict = NULL;
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -435,18 +451,21 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.getxattr = func};
@@ -468,7 +487,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
}
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_getxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -588,7 +607,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fgetxattr = func};
@@ -602,7 +621,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -774,13 +793,15 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
ctx->flags = fop->int32;
@@ -869,9 +890,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
}
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata)
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.open = func};
ec_fop_data_t *fop = NULL;
@@ -884,7 +905,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_open, ec_manager_open,
+ target, fop_flags, ec_wind_open, ec_manager_open,
callback, data);
if (fop == NULL) {
goto out;
@@ -1071,7 +1092,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata)
{
ec_cbk_t callback = {.readlink = func};
@@ -1085,7 +1106,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readlink, ec_manager_readlink, callback, data);
if (fop == NULL) {
goto out;
@@ -1331,6 +1352,7 @@ int32_t
ec_manager_readv(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ ec_t *ec = fop->xl->private;
switch (state) {
case EC_STATE_INIT:
@@ -1350,6 +1372,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ if (ec->read_mask) {
+ fop->mask &= ec->read_mask;
+ }
ec_dispatch_min(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -1417,9 +1442,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
}
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata)
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.readv = func};
ec_fop_data_t *fop = NULL;
@@ -1432,8 +1457,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readv, ec_manager_readv,
- callback, data);
+ target, fop_flags, ec_wind_readv,
+ ec_manager_readv, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1637,9 +1662,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state)
}
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
{
ec_cbk_t callback = {.seek = func};
ec_fop_data_t *fop = NULL;
@@ -1652,7 +1677,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_seek, ec_manager_seek,
+ target, fop_flags, ec_wind_seek, ec_manager_seek,
callback, data);
if (fop == NULL) {
goto out;
@@ -1855,8 +1880,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state)
}
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata)
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
ec_cbk_t callback = {.stat = func};
ec_fop_data_t *fop = NULL;
@@ -1869,7 +1895,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_stat, ec_manager_stat,
+ target, fop_flags, ec_wind_stat, ec_manager_stat,
callback, data);
if (fop == NULL) {
goto out;
@@ -1965,8 +1991,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
}
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.fstat = func};
ec_fop_data_t *fop = NULL;
@@ -1979,8 +2006,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_fstat, ec_manager_stat,
- callback, data);
+ target, fop_flags, ec_wind_fstat,
+ ec_manager_stat, callback, data);
if (fop == NULL) {
goto out;
}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index b915a992a00..9b5fe2a7fdc 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -8,10 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
-#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
@@ -89,6 +85,8 @@ ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
goto out;
}
+ if (fop->locks[0].lock)
+ ec_lock_update_good(fop->locks[0].lock, fop);
vector.iov_base = iobuf->ptr;
vector.iov_len = size;
memset(vector.iov_base, 0, vector.iov_len);
@@ -183,26 +181,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
switch (fop->id) {
case GF_FOP_SETXATTR:
if (fop->cbks.setxattr) {
- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
+ op_errno, xdata);
}
break;
case GF_FOP_REMOVEXATTR:
if (fop->cbks.removexattr) {
- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FSETXATTR:
if (fop->cbks.fsetxattr) {
- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FREMOVEXATTR:
if (fop->cbks.fremovexattr) {
- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
}
@@ -281,7 +279,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.removexattr = func};
@@ -295,7 +293,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
- minimum, ec_wind_removexattr, ec_manager_xattr,
+ fop_flags, ec_wind_removexattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -361,7 +359,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fremovexattr = func};
@@ -375,8 +373,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
- minimum, ec_wind_fremovexattr, ec_manager_xattr,
- callback, data);
+ fop_flags, ec_wind_fremovexattr,
+ ec_manager_xattr, callback, data);
if (fop == NULL) {
goto out;
}
@@ -492,16 +490,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_SETATTR) {
if (fop->cbks.setattr != NULL) {
- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0],
- &cbk->iatt[1], cbk->xdata);
+ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.fsetattr != NULL) {
- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -550,7 +547,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.setattr = func};
@@ -563,9 +560,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum,
- ec_wind_setattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
+ fop_flags, ec_wind_setattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -627,7 +624,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.fsetattr = func};
@@ -640,9 +637,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum,
- ec_wind_fsetattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -707,7 +704,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.setxattr = func};
@@ -720,9 +717,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum,
- ec_wind_setxattr, ec_manager_xattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ fop_flags, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -825,7 +822,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.fsetxattr = func};
@@ -839,7 +836,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
- minimum, ec_wind_fsetxattr, ec_manager_xattr,
+ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -992,9 +989,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.fallocate != NULL) {
- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1035,7 +1032,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.fallocate = func};
@@ -1049,8 +1046,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
- minimum, ec_wind_fallocate, ec_manager_fallocate,
- callback, data);
+ fop_flags, ec_wind_fallocate,
+ ec_manager_fallocate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1209,8 +1206,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
ec_dispatch_all(fop);
return EC_STATE_DELAYED_START;
} else {
- /*Assume discard to have succeeded on mask*/
- fop->good = fop->mask;
+ /* Assume discard to have succeeded on all bricks */
+ ec_succeed_all(fop);
}
/* Fall through */
@@ -1245,9 +1242,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.discard != NULL) {
- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1289,7 +1286,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.discard = func};
@@ -1302,9 +1299,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum,
- ec_wind_discard, ec_manager_discard, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ fop_flags, ec_wind_discard, ec_manager_discard,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1405,6 +1402,7 @@ int32_t
ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ off_t offset_down;
switch (state) {
case EC_STATE_INIT:
@@ -1416,16 +1414,19 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
+ offset_down = fop->user_size;
+ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
+
if (fop->id == GF_FOP_TRUNCATE) {
ec_lock_prepare_inode(
fop, &fop->loc[0],
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
} else {
ec_lock_prepare_fd(
fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
}
ec_lock(fop);
@@ -1471,17 +1472,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_TRUNCATE) {
if (fop->cbks.truncate != NULL) {
- fop->cbks.truncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.ftruncate != NULL) {
- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -1530,7 +1529,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.truncate = func};
@@ -1543,9 +1542,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum,
- ec_wind_truncate, ec_manager_truncate, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ fop_flags, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1604,7 +1603,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.ftruncate = func};
@@ -1618,8 +1617,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
- minimum, ec_wind_ftruncate, ec_manager_truncate,
- callback, data);
+ fop_flags, ec_wind_ftruncate,
+ ec_manager_truncate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1973,6 +1972,23 @@ ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
return found;
}
+static uintptr_t
+ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ictx = NULL;
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+ if (ictx)
+ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+ if (lock)
+ return lock->good_mask;
+ return 0;
+}
+
void
ec_writev_start(ec_fop_data_t *fop)
{
@@ -2009,20 +2025,29 @@ ec_writev_start(ec_fop_data_t *fop)
if (err != 0) {
goto failed_fd;
}
+ tail = fop->size - fop->user_size - fop->head;
if (fop->head > 0) {
- found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
- if (!found_stripe) {
- if (ec_make_internal_fop_xdata(&xdata)) {
- err = -ENOMEM;
- goto failed_xdata;
+ if (current > fop->offset) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
+ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
+ ec_add_stripe_in_cache(ec, fop);
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_head, NULL, fd, ec->stripe_size,
- fop->offset, 0, xdata);
}
}
- tail = fop->size - fop->user_size - fop->head;
if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
/* Current locking scheme will make sure the 'current' below will
* never decrease while the fop is in progress, so the checks will
@@ -2035,8 +2060,10 @@ ec_writev_start(ec_fop_data_t *fop)
err = -ENOMEM;
goto failed_xdata;
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_tail, NULL, fd, ec->stripe_size,
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
+ ec->stripe_size,
fop->offset + fop->size - ec->stripe_size, 0, xdata);
}
} else {
@@ -2211,9 +2238,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.writev != NULL) {
- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -2262,7 +2289,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
@@ -2276,7 +2303,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
ec_wind_writev, ec_manager_writev, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 58c9acb6ded..601960d6154 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -8,13 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
#include "ec-messages.h"
@@ -28,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
ec_t *ec = fop->xl->private;
ec_cbk_data_t *ans = NULL;
ec_cbk_data_t *cbk = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ans->op_ret >= 0) {
@@ -38,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
error = EIO;
}
locked |= ans->mask;
+ good = ans->count;
cbk = ans;
- } else {
- if (ans->op_errno == EAGAIN) {
- switch (fop->uint32) {
- case EC_LOCK_MODE_NONE:
- case EC_LOCK_MODE_ALL:
- /* Goal is to treat non-blocking lock as failure
- * even if there is a single EAGAIN*/
- notlocked |= ans->mask;
- break;
- }
- }
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
if (error == -1) {
- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
- if (notlocked == 0) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;
}
@@ -68,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
case EC_LOCK_MODE_NONE:
error = EAGAIN;
break;
-
case EC_LOCK_MODE_ALL:
fop->uint32 = EC_LOCK_MODE_INC;
break;
-
default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
error = EIO;
break;
}
}
} else {
- if (fop->answer && fop->answer->op_ret < 0)
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
error = fop->answer->op_errno;
- else
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
error = EIO;
+ }
}
}
@@ -275,7 +304,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
@@ -285,13 +314,12 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
gf_msg_trace("ec", 0, "EC(ENTRYLK) %p", frame);
- VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum,
- ec_wind_entrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
+ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -404,7 +432,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
@@ -417,9 +445,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum,
- ec_wind_fentrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -651,7 +679,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -665,9 +693,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum,
- ec_wind_inodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
+ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -783,7 +811,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -797,9 +825,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum,
- ec_wind_finodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1033,7 +1061,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state)
}
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -1046,7 +1074,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
ec_wind_lk, ec_manager_lk, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h
index fc33d09ea33..3252c4c1c58 100644
--- a/xlators/cluster/ec/src/ec-mem-types.h
+++ b/xlators/cluster/ec/src/ec-mem-types.h
@@ -11,14 +11,13 @@
#ifndef __EC_MEM_TYPES_H__
#define __EC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_ec_mem_types_ {
ec_mt_ec_t = gf_common_mt_end + 1,
ec_mt_xlator_t,
ec_mt_ec_inode_t,
ec_mt_ec_fd_t,
- ec_mt_ec_heal_t,
ec_mt_subvol_healer_t,
ec_mt_ec_gf_t,
ec_mt_ec_code_t,
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 5f673d69aa4..72e98f11286 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -11,7 +11,7 @@
#ifndef _EC_MESSAGES_H_
#define _EC_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+ EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
index ca33f4ffdce..f91233b2f88 100644
--- a/xlators/cluster/ec/src/ec-method.h
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -11,8 +11,6 @@
#ifndef __EC_METHOD_H__
#define __EC_METHOD_H__
-#include "xlator.h"
-
#include "ec-types.h"
#include "ec-galois.h"
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 80d9c0d4014..de9b89bb2c9 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -11,10 +11,10 @@
#ifndef __EC_TYPES_H__
#define __EC_TYPES_H__
-#include "xlator.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
#include "libxlator.h"
-#include "atomic.h"
+#include <glusterfs/atomic.h>
#define EC_GF_MAX_REGS 16
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
@@ -150,6 +155,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ uint64_t bad_version;
ec_fd_status_t fd_status[0];
};
@@ -171,6 +177,7 @@ struct _ec_inode {
gf_boolean_t have_config;
gf_boolean_t have_version;
gf_boolean_t have_size;
+ int32_t heal_count;
ec_config_t config;
uint64_t pre_version[2];
uint64_t post_version[2];
@@ -179,14 +186,15 @@ struct _ec_inode {
uint64_t dirty[2];
struct list_head heal;
ec_stripe_list_t stripe_cache;
+ uint64_t bad_version;
};
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
union _ec_cbk {
fop_access_cbk_t access;
@@ -264,6 +272,7 @@ struct _ec_lock {
uint32_t refs_pending; /* Refs assigned to fops being prepared */
uint32_t waiting_flags; /*Track xattrop/dirty marking*/
gf_boolean_t acquired;
+ gf_boolean_t contention;
gf_boolean_t unlock_now;
gf_boolean_t release;
gf_boolean_t query;
@@ -307,9 +316,9 @@ struct _ec_fop_data {
int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum; /* Minimum number of successful
- operation required to conclude a
- fop as successful */
+ uint32_t minimum; /* Minimum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
@@ -324,11 +333,12 @@ struct _ec_fop_data {
ec_cbk_data_t *answer; /* accepted answer */
int32_t lock_count;
int32_t locked;
+ gf_lock_t lock;
ec_lock_link_t locks[2];
int32_t first_lock;
- gf_lock_t lock;
- uint32_t flags;
+ uint32_t fop_flags; /* Flags passed by the caller. */
+ uint32_t flags; /* Internal flags. */
uint32_t first;
uintptr_t mask;
uintptr_t healing; /*Dispatch is done but call is successful only
@@ -616,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
@@ -641,6 +656,8 @@ struct _ec {
uintptr_t xl_notify; /* Bit flag representing
notification for bricks. */
uintptr_t node_mask;
+ uintptr_t read_mask; /*Stores user defined read-mask*/
+ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
xlator_t **xl_list;
gf_lock_t lock;
gf_timer_t *timer;
@@ -650,6 +667,7 @@ struct _ec {
gf_boolean_t optimistic_changelog;
gf_boolean_t parallel_writes;
uint32_t stripe_cache;
+ uint32_t quorum_count;
uint32_t background_heals;
uint32_t heal_wait_qlen;
uint32_t self_heal_window_size; /* max size of read/writes */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index b56dd9ada3e..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "defaults.h"
-#include "statedump.h"
-#include "compat-errno.h"
-#include "upcall-utils.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/upcall-utils.h>
#include "ec.h"
#include "ec-messages.h"
@@ -23,7 +23,7 @@
#include "ec-method.h"
#include "ec-code.h"
#include "ec-heald.h"
-#include "events.h"
+#include <glusterfs/events.h>
static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = {
[EC_ROUND_ROBIN] = "round-robin",
@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
failed);
GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
+ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
ret = 0;
if (ec_assign_read_policy(ec, read_policy)) {
ret = -1;
@@ -324,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -338,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -355,6 +366,7 @@ ec_notify_cbk(void *data)
ec_t *ec = data;
glusterfs_event_t event = GF_EVENT_MAXVAL;
gf_boolean_t propagate = _gf_false;
+ gf_boolean_t launch_heal = _gf_false;
LOCK(&ec->lock);
{
@@ -384,6 +396,11 @@ ec_notify_cbk(void *data)
* still bricks DOWN, they will be healed when they
* come up. */
ec_up(ec->xl, ec);
+
+ if (ec->shd.iamshd && !ec->shutdown) {
+ launch_heal = _gf_true;
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
}
propagate = _gf_true;
@@ -391,13 +408,12 @@ ec_notify_cbk(void *data)
unlock:
UNLOCK(&ec->lock);
+ if (launch_heal) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
- /* We have just brought the volume UP, so we trigger
- * a self-heal check on the root directory. */
- ec_launch_replace_heal(ec);
- }
-
default_notify(ec->xl, event, NULL);
}
}
@@ -425,10 +441,55 @@ ec_disable_delays(ec_t *ec)
{
ec->shutdown = _gf_true;
- return list_empty(&ec->pending_fops);
+ return __ec_is_last_fop(ec);
}
void
+ec_cleanup_healer_object(ec_t *ec)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ void *res = NULL;
+ int i = 0;
+ gf_boolean_t is_join = _gf_false;
+
+ shd = &ec->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < ec->nodes; i++) {
+ healer = &shd->index_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+
+ healer = &shd->full_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+ }
+}
+void
ec_pending_fops_completed(ec_t *ec)
{
if (ec->shutdown) {
@@ -441,6 +502,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
{
uintptr_t current_state = 0;
+ if (xlator_is_cleanup_starting(ec->xl))
+ return _gf_false;
+
if ((ec->xl_notify & index_mask) == 0) {
ec->xl_notify |= index_mask;
ec->xl_notify_count++;
@@ -462,6 +526,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
struct gf_upcall_cache_invalidation *ci = NULL;
struct gf_upcall_inodelk_contention *lc = NULL;
inode_t *inode;
+ inode_table_t *table;
switch (upcall->event_type) {
case GF_UPCALL_CACHE_INVALIDATION:
@@ -475,8 +540,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
/* The lock is not owned by EC, ignore it. */
return _gf_true;
}
- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
- upcall->gfid);
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
/* If inode is not found, it means that it's already released,
* so we can ignore it. Probably it has been released and
* destroyed while the contention notification was being sent.
@@ -544,6 +619,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
/* If there aren't pending fops running after we have waken up
* them, we immediately propagate the notification. */
propagate = ec_disable_delays(ec);
+ ec_cleanup_healer_object(ec);
goto unlock;
}
@@ -554,7 +630,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
if (event == GF_EVENT_CHILD_UP) {
/* We need to trigger a selfheal if a brick changes
* to UP state. */
- needs_shd_check = ec_set_up_state(ec, mask, mask);
+ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+ !ec->shutdown) {
+ needs_shd_check = _gf_true;
+ }
} else if (event == GF_EVENT_CHILD_DOWN) {
ec_set_up_state(ec, mask, 0);
}
@@ -584,17 +663,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
}
} else {
propagate = _gf_false;
+ needs_shd_check = _gf_false;
+ }
+
+ if (needs_shd_check) {
+ GF_ATOMIC_INC(ec->async_fop_count);
}
}
unlock:
UNLOCK(&ec->lock);
done:
+ if (needs_shd_check) {
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if (needs_shd_check && ec->shd.iamshd) {
- ec_launch_replace_heal(ec);
- }
-
error = default_notify(this, event, data);
}
@@ -627,6 +710,69 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+}
+
+static int
+ec_assign_read_mask(ec_t *ec, char *read_mask_str)
+{
+ char *mask = NULL;
+ char *maskptr = NULL;
+ char *saveptr = NULL;
+ char *id_str = NULL;
+ int id = 0;
+ int ret = 0;
+ uintptr_t read_mask = 0;
+
+ if (!read_mask_str) {
+ ec->read_mask = 0;
+ ret = 0;
+ goto out;
+ }
+
+ mask = gf_strdup(read_mask_str);
+ if (!mask) {
+ ret = -1;
+ goto out;
+ }
+ maskptr = mask;
+
+ for (;;) {
+ id_str = strtok_r(maskptr, ":", &saveptr);
+ if (id_str == NULL)
+ break;
+ if (gf_string2int(id_str, &id)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %s is not a valid integer",
+ read_mask_str, id_str);
+ ret = -1;
+ goto out;
+ }
+
+ if ((id < 0) || (id >= ec->nodes)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %d is not in range [0 - %d]",
+ read_mask_str, id, ec->nodes - 1);
+ ret = -1;
+ goto out;
+ }
+ read_mask |= (1UL << id);
+ maskptr = NULL;
+ }
+
+ if (gf_bits_count(read_mask) < ec->fragments) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "read-mask \"%s\" should contain at least %d ids", read_mask_str,
+ ec->fragments);
+ ret = -1;
+ goto out;
+ }
+ ec->read_mask = read_mask;
+ ret = 0;
+out:
+ GF_FREE(mask);
+ return ret;
}
int32_t
@@ -636,6 +782,7 @@ init(xlator_t *this)
char *read_policy = NULL;
char *extensions = NULL;
int32_t err;
+ char *read_mask_str = NULL;
if (this->parents == NULL) {
gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS,
@@ -656,6 +803,7 @@ init(xlator_t *this)
ec->xl = this;
LOCK_INIT(&ec->lock);
+ GF_ATOMIC_INIT(ec->async_fop_count, 0);
INIT_LIST_HEAD(&ec->pending_fops);
INIT_LIST_HEAD(&ec->heal_waiting);
INIT_LIST_HEAD(&ec->healing);
@@ -714,12 +862,18 @@ init(xlator_t *this)
if (ec_assign_read_policy(ec, read_policy))
goto failed;
+ GF_OPTION_INIT("heal-timeout", ec->shd.timeout, int32, failed);
GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed);
GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool,
failed);
GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed);
+
+ if (ec_assign_read_mask(ec, read_mask_str))
+ goto failed;
this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
if (!this->itable)
@@ -759,6 +913,7 @@ failed:
void
fini(xlator_t *this)
{
+ ec_selfheal_daemon_fini(this);
__ec_destroy_private(this);
}
@@ -797,11 +952,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc,
- basename, cmd, type, xdata);
+ fop_flags = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
+ loc, basename, cmd, type, xdata);
return 0;
}
@@ -811,10 +967,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
fd, basename, cmd, type, xdata);
return 0;
@@ -905,7 +1062,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
{
int error = 0;
ec_t *ec = this->private;
- int32_t minimum = EC_MINIMUM_ONE;
+ int32_t fop_flags = EC_MINIMUM_ONE;
if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
@@ -920,11 +1077,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
- minimum = EC_MINIMUM_ALL;
+ fop_flags = EC_MINIMUM_ALL;
}
- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name,
- xdata);
+ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
+ name, xdata);
return 0;
out:
@@ -954,11 +1111,12 @@ int32_t
ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
+ fop_flags = EC_MINIMUM_ONE;
- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
return 0;
@@ -968,10 +1126,11 @@ int32_t
ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
return 0;
@@ -991,10 +1150,11 @@ int32_t
ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
xdata);
return 0;
@@ -1389,6 +1549,10 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
gf_proc_dump_write("childs_up_mask", "%s",
ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ if (ec->read_mask) {
+ gf_proc_dump_write("read-mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes));
+ }
gf_proc_dump_write("background-heals", "%d", ec->background_heals);
gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
gf_proc_dump_write("self-heal-window-size", "%" PRIu32,
@@ -1397,6 +1561,7 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
+ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
this->type, this->name);
@@ -1416,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
@@ -1666,4 +1835,39 @@ struct volume_options options[] = {
"specially for sequential writes. However, this will also"
"lead to extra memory consumption, maximum "
"(cache size * stripe size) Bytes per open file."},
- {.key = {NULL}}};
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description =
+ "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+ " count should be in the range"
+ "[disperse-data-count, disperse-count] (inclusive)",
+ },
+ {
+ .key = {"ec-read-mask"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = NULL,
+ .description = "This option can be used to choose which bricks can be"
+ " used for reading data/metadata of a file/directory",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "disperse",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 1b210d9adc1..6f6de6d5981 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -18,6 +18,7 @@
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
#define EC_STRIPE_CACHE_MAX_SIZE 10
#define EC_VERSION_SIZE 2
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
deleted file mode 100644
index 2b594567db1..00000000000
--- a/xlators/cluster/stripe/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-xlator_LTLIBRARIES = stripe.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-stripe_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-
-stripe_la_SOURCES = stripe.c stripe-helpers.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
-
-stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = stripe.h stripe-mem-types.h \
- $(top_builddir)/xlators/lib/src/libxlator.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
deleted file mode 100644
index c3743723d1d..00000000000
--- a/xlators/cluster/stripe/src/stripe-helpers.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <fnmatch.h>
-
-#include "stripe.h"
-#include "byte-order.h"
-#include "mem-types.h"
-#include "logging.h"
-
-void
-stripe_local_wipe(stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe(&local->loc);
- loc_wipe(&local->loc2);
-
- if (local->fd)
- fd_unref(local->fd);
-
- if (local->inode)
- inode_unref(local->inode);
-
- if (local->xattr)
- dict_unref(local->xattr);
-
- if (local->xdata)
- dict_unref(local->xdata);
-
-out:
- return;
-}
-
-int
-stripe_aggregate(dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin(dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char);
- if (size == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin(dst, key, size, sizeof(int64_t));
- if (ret < 0) {
- gf_log("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE(size);
- goto out;
- }
- }
-
- ptr = data_to_bin(value);
- if (ptr == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64(ntoh64(*size) + ntoh64(*ptr));
- } else if (strcmp(key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set(dst, key, value);
- if (ret)
- gf_log("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return 0;
-}
-
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach(src, stripe_aggregate, dst);
-out:
- return;
-}
-
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len - 1; /* length includes \0 */
-
- if (len && xattr && xattr->xattr_value) {
- memcpy(buffer, xattr->xattr_value, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
-out:
- return ret;
-}
-
-int32_t
-stripe_free_xattr_str(stripe_local_t *local)
-{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->xattr_value)
- GF_FREE(xattr->xattr_value);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz)
-{
- int32_t ret = -1, i = 0, len = 0;
- dict_t *tmp1 = NULL, *tmp2 = NULL;
- char *buf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (xattr_serz == NULL) {
- goto out;
- }
-
- tmp2 = dict_new();
-
- if (tmp2 == NULL) {
- goto out;
- }
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len;
-
- if (len && xattr && xattr->xattr_value) {
- ret = dict_reset(tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_reset failed (%s)",
- strerror(-ret));
- }
-
- ret = dict_unserialize(xattr->xattr_value, xattr->xattr_len, &tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "dict_unserialize failed (%s)", strerror(-ret));
- ret = -1;
- goto out;
- }
-
- tmp1 = dict_copy(tmp2, tmp1);
- if (tmp1 == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "dict_copy failed (%s)",
- strerror(-ret));
- ret = -1;
- goto out;
- }
- }
- }
-
- len = dict_serialized_length(tmp1);
- if (len > 0) {
- buf = GF_CALLOC(1, len, gf_common_mt_dict_t);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
-
- ret = dict_serialize(tmp1, buf);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "dict_serialize failed (%s)",
- strerror(-ret));
- GF_FREE(buf);
- ret = -1;
- goto out;
- }
-
- *xattr_serz = buf;
- }
-
- ret = 0;
-out:
- if (tmp1 != NULL) {
- dict_unref(tmp1);
- }
-
- if (tmp2 != NULL) {
- dict_unref(tmp2);
- }
-
- return ret;
-}
-
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz)
-{
- int ret = -1;
- int32_t padding = 0;
- int32_t tlen = 0;
- int len = 0;
- char stripe_size_str[20] = {
- 0,
- };
- char *pathinfo_serz = NULL;
-
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "Possible NULL deref");
- goto out;
- }
-
- len = snprintf(stripe_size_str, sizeof(stripe_size_str), "%" PRId64,
- local->fctx ? local->fctx->stripe_size : 0);
- if (len < 0 || len >= sizeof(stripe_size_str))
- goto out;
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen(this->name) + SLEN(STRIPE_PATHINFO_HEADER) + len + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_MALLOC(local->xattr_total_len, gf_common_mt_char);
- if (!pathinfo_serz)
- goto out;
-
- /* xlator info */
- (void)sprintf(pathinfo_serz, "(<" STRIPE_PATHINFO_HEADER "%s:[%s]> ",
- this->name, stripe_size_str);
-
- ret = stripe_xattr_aggregate(pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
- GF_FREE(pathinfo_serz);
- goto out;
- }
-
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- *xattr_serz = pathinfo_serz;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv)
-{
- struct stripe_options *trav = NULL;
- uint64_t block_size = 0;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
- GF_VALIDATE_OR_GOTO("stripe", path, out);
-
- LOCK(&priv->lock);
- {
- block_size = priv->block_size;
- trav = priv->pattern;
- while (trav) {
- if (!fnmatch(trav->path_pattern, path, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- }
- UNLOCK(&priv->lock);
-
-out:
- return block_size;
-}
-
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict)
-{
- char key[256] = {
- 0,
- };
- data_t *data = NULL;
- int32_t index = 0;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC(1, sizeof(stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf(key, "trusted.%s.stripe-size", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-size");
- goto out;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size = data_to_int64(data);
- }
-
- if (local->fctx->stripe_size != data_to_int64(data)) {
- gf_log(this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
-
- /* Stripe count */
- sprintf(key, "trusted.%s.stripe-count", this->name);
- data = dict_get(dict, key);
-
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-count");
- goto out;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32(data);
- if (!local->fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- local->fctx->xl_array = GF_CALLOC(local->fctx->stripe_count,
- sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
-
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
- if (local->fctx->stripe_count != data_to_int32(data)) {
- gf_log(this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32(data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- /* index */
- sprintf(key, "trusted.%s.stripe-index", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-index");
- goto out;
- }
- index = data_to_int32(data);
- if (index > priv->child_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-index xattr (%d)",
- index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
- if (local->fctx->xl_array) {
- if (!local->fctx->xl_array[index])
- local->fctx->xl_array[index] = prev->this;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- data = dict_get(dict, key);
- if (!data) {
- /*
- * The file was probably created prior to coalesce support.
- * Assume non-coalesce mode for this file to maintain backwards
- * compatibility.
- */
- gf_log(this->name, GF_LOG_DEBUG,
- "missing stripe-coalesce "
- "attr, assume non-coalesce mode");
- local->fctx->stripe_coalesce = 0;
- } else {
- local->fctx->stripe_coalesce = data_to_int32(data);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce)
-{
- char key[256] = {
- 0,
- };
- int32_t ret = -1;
-
- sprintf(key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64(dict, key, stripe_size);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32(dict, key, stripe_count);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32(dict, key, stripe_index);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- ret = dict_set_int32(dict, key, stripe_coalesce);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req_dict",
- key);
- goto out;
- }
-out:
- return ret;
-}
-
-static int
-set_default_block_size(stripe_private_t *priv, char *num)
-{
- int ret = -1;
- GF_VALIDATE_OR_GOTO("stripe", THIS, out);
- GF_VALIDATE_OR_GOTO(THIS->name, priv, out);
- GF_VALIDATE_OR_GOTO(THIS->name, num, out);
-
- if (gf_string2bytesize_uint64(num, &priv->block_size) != 0) {
- gf_log(THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num);
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data)
-{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r(data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup(stripe_str);
- stripe_opt = GF_CALLOC(1, sizeof(struct stripe_options),
- gf_stripe_mt_stripe_options);
- if (!stripe_opt) {
- goto out;
- }
-
- pattern = strtok_r(dup_str, ":", &tmp_str1);
- num = strtok_r(NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- ret = set_default_block_size(priv, num);
- if (ret)
- goto out;
- }
- if (gf_string2bytesize_uint64(num, &stripe_opt->block_size) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "invalid number format \"%s\"",
- num);
- goto out;
- }
-
- if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
- gf_log(this->name, GF_LOG_ERROR,
- "Invalid Block-size: "
- "%s. Should be at least %llu bytes",
- num, STRIPE_MIN_BLOCK_SIZE);
- goto out;
- }
- if (stripe_opt->block_size % 512) {
- gf_log(this->name, GF_LOG_ERROR,
- "Block-size: %s should"
- " be a multiple of 512 bytes",
- num);
- goto out;
- }
-
- memcpy(stripe_opt->path_pattern, pattern, strlen(pattern));
-
- gf_log(this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %" PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (priv->pattern)
- temp_stripeopt = NULL;
- else
- temp_stripeopt = priv->pattern;
-
- stripe_opt->next = temp_stripeopt;
-
- priv->pattern = stripe_opt;
- stripe_opt = NULL;
-
- GF_FREE(dup_str);
- dup_str = NULL;
-
- stripe_str = strtok_r(NULL, ",", &tmp_str);
- }
-
- ret = 0;
-out:
-
- GF_FREE(dup_str);
-
- GF_FREE(stripe_opt);
-
- return ret;
-}
-
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to)
-{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
-}
-
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
-{
- size_t line_size = 0;
- uint64_t stripe_num = 0;
- off_t coalesced_offset = 0;
-
- line_size = stripe_size * stripe_count;
- stripe_num = offset / line_size;
-
- coalesced_offset = (stripe_num * stripe_size) + (offset % stripe_size);
-
- return coalesced_offset;
-}
-
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index)
-{
- uint64_t nr_full_stripe_chunks = 0, mod = 0;
-
- if (!size)
- return size;
-
- /*
- * Estimate the number of fully written stripes from the
- * local file size. Each stripe_size chunk corresponds to
- * a stripe.
- */
- nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
- mod = size % stripe_size;
-
- if (!mod) {
- /*
- * There is no remainder, thus we could have overestimated
- * the size of the file in terms of chunks. Trim the number
- * of chunks by the following stripe members and leave it
- * up to those nodes to respond with a larger size (if
- * necessary).
- */
- nr_full_stripe_chunks -= stripe_count - (stripe_index + 1);
- size = nr_full_stripe_chunks * stripe_size;
- } else {
- /*
- * There is a remainder and thus we own the last chunk of the
- * file. Add the preceding stripe members of the final stripe
- * along with the remainder to calculate the exact size.
- */
- nr_full_stripe_chunks += stripe_index;
- size = nr_full_stripe_chunks * stripe_size + mod;
- }
-
- return size;
-}
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
deleted file mode 100644
index dcbef31212b..00000000000
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __STRIPE_MEM_TYPES_H__
-#define __STRIPE_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_stripe_mem_types_ {
- gf_stripe_mt_iovec = gf_common_mt_end + 1,
- gf_stripe_mt_stripe_replies,
- gf_stripe_mt_stripe_fd_ctx_t,
- gf_stripe_mt_char,
- gf_stripe_mt_int8_t,
- gf_stripe_mt_int32_t,
- gf_stripe_mt_xlator_t,
- gf_stripe_mt_stripe_private_t,
- gf_stripe_mt_stripe_options,
- gf_stripe_mt_xattr_sort_t,
- gf_stripe_mt_end
-};
-#endif
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
deleted file mode 100644
index 6010c1ed6c3..00000000000
--- a/xlators/cluster/stripe/src/stripe.c
+++ /dev/null
@@ -1,5612 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/**
- * xlators/cluster/stripe:
- * Stripe translator, stripes the data across its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
- * 'df' or 'du <file>', real size of the file on the server is shown.
- *
- * WARNING:
- * Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-#include <fnmatch.h>
-#include "stripe.h"
-#include "libxlator.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-struct volume_options options[];
-
-int32_t
-stripe_sh_chown_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int callcnt = -1;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_DESTROY(frame);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_sh_make_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!frame || !frame->local || !cookie || !this) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- STACK_WIND(frame, stripe_sh_chown_cbk, prev->this,
- prev->this->fops->setattr, &local->loc, &local->stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_entry_self_heal(call_frame_t *frame, xlator_t *this,
- stripe_local_t *local)
-{
- xlator_list_t *trav = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_private_t *priv = NULL;
- dict_t *xdata = NULL;
- int ret = 0;
-
- if (!local || !this || !frame) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- if (!(IA_ISREG(local->stbuf.ia_type) || IA_ISDIR(local->stbuf.ia_type)))
- return 0;
-
- priv = this->private;
- trav = this->children;
- rframe = copy_frame(frame);
- if (!rframe) {
- goto out;
- }
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- goto out;
- }
- rframe->local = rlocal;
- rlocal->call_count = priv->child_count;
- loc_copy(&rlocal->loc, &local->loc);
- memcpy(&rlocal->stbuf, &local->stbuf, sizeof(struct iatt));
-
- xdata = dict_new();
- if (!xdata)
- goto out;
-
- ret = dict_set_gfuuid(xdata, "gfid-req", local->stbuf.ia_gfid, true);
- if (ret)
- gf_log(this->name, GF_LOG_WARNING, "%s: failed to set gfid-req",
- local->loc.path);
-
- while (trav) {
- if (IA_ISREG(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- 0, xdata);
- }
- if (IA_ISDIR(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- xdata);
- }
- trav = trav->next;
- }
-
- if (xdata)
- dict_unref(xdata);
- return 0;
-
-out:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- return 0;
-}
-
-int32_t
-stripe_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if ((op_errno != ENOENT) && (op_errno != ESTALE))
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- if (((op_errno != ENOENT) && (op_errno != ENOTCONN) &&
- (op_errno != ESTALE)) ||
- (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- if (op_errno == ENOENT)
- local->entry_self_heal_needed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
- if (IA_ISREG(buf->ia_type)) {
- ret = stripe_ctx_handle(this, prev, local, xdata);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from"
- " dict");
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->inode = inode_ref(inode);
- if (xdata)
- local->xdata = dict_ref(xdata);
- if (local->xattr) {
- stripe_aggregate_xattr(local->xdata, local->xattr);
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
- }
-
- if (!local->xdata && !local->xattr) {
- local->xattr = dict_ref(xdata);
- } else if (local->xdata) {
- stripe_aggregate_xattr(local->xdata, xdata);
- } else if (local->xattr) {
- stripe_aggregate_xattr(local->xattr, xdata);
- }
-
- local->stbuf_blocks += buf->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- /* Make sure the gfid on all the nodes are same */
- if (gf_uuid_compare(local->ia_gfid, buf->ia_gfid)) {
- gf_log(this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume %s", local->loc.path,
- prev->this->name);
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed &&
- !gf_uuid_is_null(local->loc.inode->gfid))
- stripe_entry_self_heal(frame, this, local);
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_blocks = local->stbuf_blocks;
- local->stbuf.ia_size = local->stbuf_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
-
- STRIPE_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xdata,
- &local->postparent);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int64_t filesize = 0;
- int ret = 0;
- uint64_t tmpctx = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- inode_ctx_get(local->inode, this, &tmpctx);
- if (tmpctx)
- local->fctx = (stripe_fd_ctx_t *)(long)tmpctx;
-
- /* quick-read friendly changes */
- if (xdata && dict_get(xdata, GF_CONTENT_KEY)) {
- ret = dict_get_int64(xdata, GF_CONTENT_KEY, &filesize);
- if (!ret && (filesize > priv->block_size))
- dict_del(xdata, GF_CONTENT_KEY);
- }
-
- /* get stripe-size xattr on lookup. This would be required for
- * open/read/write/pathinfo calls. Hence we send down the request
- * even when type == IA_INVAL */
-
- /*
- * We aren't guaranteed to have xdata here. We need the format info for
- * the file, so allocate xdata if necessary.
- */
- if (!xdata)
- xdata = dict_new();
- else
- xdata = dict_ref(xdata);
-
- if (xdata &&
- (IA_ISREG(loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) {
- ret = stripe_xattr_request_build(this, xdata, 8, 4, 4, 0);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build"
- " xattr request for %s",
- loc->path);
- }
-
- /* Every time in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, loc, xdata);
- trav = trav->next;
- }
-
- dict_unref(xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- }
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret && (op_errno != ENOTCONN)) {
- local->op_errno = op_errno;
- }
- if (op_ret == 0) {
- struct statvfs *dict_buf = &local->statvfs_buf;
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = 0;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- frame->local = local;
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
-
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR, "no xlator at index %d", i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- /*
- * The node that owns EOF is truncated to the exact
- * coalesced offset. Nodes prior to this index should
- * be rounded up to the size of the complete stripe,
- * while nodes after this index should be rounded down
- * to the size of the previous stripe.
- */
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->truncate, loc, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *preop;
- local->post_buf = *postop;
- }
-
- local->prebuf_blocks += preop->ia_blocks;
- local->postbuf_blocks += postop->ia_blocks;
-
- correct_file_size(preop, local->fctx, prev);
- correct_file_size(postop, local->fctx, prev);
-
- if (local->prebuf_size < preop->ia_size)
- local->prebuf_size = preop->ia_size;
- if (local->postbuf_size < postop->ia_size)
- local->postbuf_size = postop->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- if (!IA_ISDIR(loc->inode->ia_type) && !IA_ISREG(loc->inode->ia_type)) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, NULL);
- return 0;
- }
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->setattr, loc, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stack_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- local->stbuf.ia_blocks += buf->ia_blocks;
- local->preparent.ia_blocks += preoldparent->ia_blocks;
- local->postparent.ia_blocks += postoldparent->ia_blocks;
- local->pre_buf.ia_blocks += prenewparent->ia_blocks;
- local->post_buf.ia_blocks += postnewparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf.ia_size < buf->ia_size)
- local->stbuf.ia_size = buf->ia_size;
-
- if (local->preparent.ia_size < preoldparent->ia_size)
- local->preparent.ia_size = preoldparent->ia_size;
-
- if (local->postparent.ia_size < postoldparent->ia_size)
- local->postparent.ia_size = postoldparent->ia_size;
-
- if (local->pre_buf.ia_size < prenewparent->ia_size)
- local->pre_buf.ia_size = prenewparent->ia_size;
-
- if (local->post_buf.ia_size < postnewparent->ia_size)
- local->post_buf.ia_size = postnewparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preparent,
- &local->postparent, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- local = frame->local;
- trav = this->children;
-
- local->stbuf = *buf;
- local->preparent = *preoldparent;
- local->postparent = *postoldparent;
- local->pre_buf = *prenewparent;
- local->post_buf = *postnewparent;
-
- local->op_ret = 0;
- local->call_count--;
-
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND(frame, stripe_stack_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, &local->loc, &local->loc2, NULL);
- trav = trav->next;
- }
- return 0;
-
-unwind:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, NULL);
- return 0;
-}
-
-int32_t
-stripe_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow rename */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->op_ret = -1;
- loc_copy(&local->loc, oldloc);
- loc_copy(&local->loc2, newloc);
-
- local->call_count = priv->child_count;
-
- if (IA_ISREG(oldloc->inode->ia_type)) {
- inode_ctx_get(oldloc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- STACK_WIND(frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc, NULL);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-int32_t
-stripe_first_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- goto out;
- }
- local->op_ret = 0;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if (op_errno != ENOENT) {
- local->failed = 1;
- local->op_ret = op_ret;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed) {
- op_errno = local->op_errno;
- goto out;
- }
- STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
- local->xdata);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- loc_copy(&local->loc, loc);
- local->xflag = xflag;
-
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- frame->local = local;
- local->call_count = priv->child_count;
- trav = trav->next; /* Skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, loc, xflag, xdata);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_first_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- goto err;
- }
-
- local = frame->local;
- local->op_ret = 0;
-
- local->call_count--; /* First child successful */
-
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- if (op_errno != ENOENT)
- local->failed = 1;
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed)
- goto out;
- STACK_WIND(frame, stripe_first_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, &local->loc, local->flags,
- NULL);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* don't delete a directory if any of the subvolume is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
- local->flags = flags;
- local->call_count = priv->child_count;
- trav = trav->next; /* skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_fail_unlink_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-/**
- */
-int32_t
-stripe_mknod_ifreg_setxattr_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
-
- /* Can be used as a mechanism to understand if mknod
- was successful in at least one place */
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict");
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if ((local->op_ret == -1) && !gf_uuid_is_null(local->ia_gfid)) {
- /* ia_gfid set means, at least on one node 'mknod'
- is successful */
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_first_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
-
- local->call_count--;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = op_ret;
-
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- local->loc.path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build xattr request");
-
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_mknod_ifreg_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc, local->mode,
- local->rdev, 0, dict);
- trav = trav->next;
- i++;
-
- if (dict && need_unref)
- dict_unref(dict);
- }
-
- return 0;
-
-out:
-
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_single_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int
-stripe_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int32_t i = 0;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
- S_IFREG files is necessary */
- if (priv->nodes_down) {
- gf_log(this->name, GF_LOG_WARNING, "Some node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->xattr = dict_copy_with_ref(xdata, NULL);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
-
- /* Every time in stripe lookup, all child nodes should
- be looked up */
- local->call_count = priv->child_count;
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_mknod_first_ifreg_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- dict);
-
- if (dict && need_unref)
- dict_unref(dict);
- return 0;
- }
-
- STACK_WIND(frame, stripe_single_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(mkdir, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- trav = this->children;
-
- local->call_count--; /* first child is successful */
- trav = trav->next; /* skip first child */
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = 0;
-
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- local->stbuf_size = buf->ia_size;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
-
- while (trav) {
- STACK_WIND(frame, stripe_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->umask, local->xdata);
- trav = trav->next;
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-stripe_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->call_count = priv->child_count;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->mode = mode;
- local->umask = umask;
- loc_copy(&local->loc, loc);
- frame->local = local;
-
- /* Every time in stripe lookup, all child nodes should be looked up */
- STACK_WIND(frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (IA_ISREG(inode->ia_type)) {
- inode_ctx_get(inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to get stripe context");
- op_ret = -1;
- op_errno = EINVAL;
- }
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(link, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow link operation */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Every time in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND(frame, stripe_link_cbk, trav->xlator,
- trav->xlator->fops->link, oldloc, newloc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_create_fail_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- if (IA_ISREG(buf->ia_type)) {
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from "
- "dict");
- }
-
- local->op_ret = op_ret;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, &local->loc, 0, NULL);
- trav = trav->next;
- }
-
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- stripe_copy_xl_array(local->fctx->xl_array, priv->xl_array,
- local->fctx->stripe_count);
- inode_ctx_put(local->inode, this, (uint64_t)(uintptr_t)local->fctx);
- }
-
- /* Create itself has failed.. so return
- without setxattring */
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_first_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- loc_t *loc = NULL;
- int32_t need_unref = 0;
- int32_t ret = -1;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
- loc = &local->loc;
-
- --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- local->op_ret = 0;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, 0, NULL);
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- /* Send a setxattr request to nodes where the
- files are created */
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, dict);
- trav = trav->next;
- if (need_unref && dict)
- dict_unref(dict);
- i++;
- }
-
-out:
- return 0;
-}
-
-/**
- * stripe_create - If a block-size is specified for the 'name', create the
- * file in all the child nodes. If not, create it in only first child.
- *
- * @name- complete path of the file to be created.
- */
-int32_t
-stripe_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int ret = 0;
- int need_unref = 0;
- int i = 0;
- dict_t *dict = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- /* files created in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- if (priv->first_child_down || priv->nodes_down) {
- gf_log(this->name, GF_LOG_DEBUG, "First node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->fd = fd_ref(fd);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- if (xdata)
- local->xattr = dict_ref(xdata);
-
- local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i, priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_first_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- dict);
-
- if (need_unref && dict)
- dict_unref(dict);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, xdata);
- return 0;
-}
-
-int32_t
-stripe_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_open -
- */
-int32_t
-stripe_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- /* files opened in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- local->fd = fd_ref(fd);
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- /* Striped files */
- local->flags = flags;
- local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
-
- while (trav) {
- STACK_WIND(frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc, local->flags,
- local->fd, xdata);
- trav = trav->next;
- }
- return 0;
-err:
- STRIPE_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->call_count = priv->child_count;
- local->fd = fd_ref(fd);
-
- while (trav) {
- STACK_WIND(frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- if (FIRST_CHILD(this) == prev->this) {
- /* First successful call, copy the *lock */
- local->op_ret = op_ret;
- local->lock = *lock;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- STRIPE_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
- &local->lock, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_lk_cbk, trav->xlator, trav->xlator->fops->lk,
- fd, cmd, lock, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- op_errno = EINVAL;
- goto err;
- }
- local->fctx = fctx;
- local->op_ret = -1;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this)
- local->stbuf = *buf;
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(fd->inode->ia_type)) {
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
- if (!fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe count");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR,
- "no xlator at index "
- "%d",
- i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
- NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_readv_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t i = 0;
- int32_t callcnt = 0;
- int32_t count = 0;
- stripe_local_t *local = NULL;
- struct iovec *vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iobref *tmp_iobref = NULL;
- struct iobuf *iobuf = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret != -1) {
- correct_file_size(buf, local->fctx, prev);
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
-
- /* Keep extra space for filling in '\0's */
- vec = GF_CALLOC((local->count * 2), sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!vec) {
- op_ret = -1;
- goto done;
- }
-
- for (i = 0; i < local->wind_count; i++) {
- if (local->replies[i].op_ret) {
- memcpy((vec + count), local->replies[i].vector,
- (local->replies[i].count * sizeof(struct iovec)));
- count += local->replies[i].count;
- op_ret += local->replies[i].op_ret;
- }
- if ((local->replies[i].op_ret < local->replies[i].requested_size) &&
- (local->stbuf_size > (local->offset + op_ret))) {
- /* Fill in 0s here */
- vec[count].iov_len = (local->replies[i].requested_size -
- local->replies[i].op_ret);
- iobuf = iobuf_get2(this->ctx->iobuf_pool, vec[count].iov_len);
- if (!iobuf) {
- gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto done;
- }
- memset(iobuf->ptr, 0, vec[count].iov_len);
- vec[count].iov_base = iobuf->ptr;
-
- iobref_add(local->iobref, iobuf);
- iobuf_unref(iobuf);
-
- op_ret += vec[count].iov_len;
- count++;
- }
- GF_FREE(local->replies[i].vector);
- }
-
- /* ENOENT signals EOF to the NFS-server */
- if (op_ret != -1 && op_ret < local->readv_size &&
- (local->offset + op_ret == buf->ia_size))
- op_errno = ENOENT;
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &local->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
-
- done:
- GF_FREE(local->replies);
- tmp_iobref = local->iobref;
- STRIPE_STACK_UNWIND(readv, frame, op_ret, op_errno, vec, count,
- &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(vec);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_readv_cbk - get all the striped reads, and order it properly, send it
- * to above layer after putting it in a single vector.
- */
-int32_t
-stripe_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int32_t final_count = 0;
- int32_t need_to_check_proper_size = 0;
- call_frame_t *mframe = NULL;
- stripe_local_t *mlocal = NULL;
- stripe_local_t *local = NULL;
- struct iovec *final_vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iatt *tmp_stbuf_p = NULL; // need it for a warning
- struct iobref *tmp_iobref = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto end;
- }
-
- local = frame->local;
- index = local->node_index;
- prev = cookie;
- mframe = local->orig_frame;
- if (!mframe)
- goto out;
-
- mlocal = mframe->local;
- if (!mlocal)
- goto out;
-
- fctx = mlocal->fctx;
-
- LOCK(&mframe->lock);
- {
- mlocal->replies[index].op_ret = op_ret;
- mlocal->replies[index].op_errno = op_errno;
- mlocal->replies[index].requested_size = local->readv_size;
- if (op_ret >= 0) {
- mlocal->replies[index].stbuf = *stbuf;
- mlocal->replies[index].count = count;
- mlocal->replies[index].vector = iov_dup(vector, count);
-
- correct_file_size(stbuf, fctx, prev);
-
- if (local->stbuf_size < stbuf->ia_size)
- local->stbuf_size = stbuf->ia_size;
- local->stbuf_blocks += stbuf->ia_blocks;
-
- if (!mlocal->iobref)
- mlocal->iobref = iobref_new();
- iobref_merge(mlocal->iobref, iobref);
- }
- callcnt = ++mlocal->call_count;
- }
- UNLOCK(&mframe->lock);
-
- if (callcnt == mlocal->wind_count) {
- op_ret = 0;
-
- for (index = 0; index < mlocal->wind_count; index++) {
- /* check whether each stripe returned
- * 'expected' number of bytes */
- if (mlocal->replies[index].op_ret == -1) {
- op_ret = -1;
- op_errno = mlocal->replies[index].op_errno;
- break;
- }
- /* TODO: handle the 'holes' within the read range
- properly */
- if (mlocal->replies[index].op_ret <
- mlocal->replies[index].requested_size) {
- need_to_check_proper_size = 1;
- }
-
- op_ret += mlocal->replies[index].op_ret;
- mlocal->count += mlocal->replies[index].count;
- }
- if (op_ret == -1)
- goto done;
- if (need_to_check_proper_size)
- goto check_size;
-
- final_vec = GF_CALLOC(mlocal->count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
-
- if (!final_vec) {
- op_ret = -1;
- goto done;
- }
-
- for (index = 0; index < mlocal->wind_count; index++) {
- memcpy((final_vec + final_count), mlocal->replies[index].vector,
- (mlocal->replies[index].count * sizeof(struct iovec)));
- final_count += mlocal->replies[index].count;
- GF_FREE(mlocal->replies[index].vector);
- }
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
- tmp_stbuf.ia_blocks = local->stbuf_blocks;
-
- done:
- /* */
- GF_FREE(mlocal->replies);
- tmp_iobref = mlocal->iobref;
- /* work around for nfs truncated read. Bug 3774 */
- tmp_stbuf_p = &tmp_stbuf;
- WIPE(tmp_stbuf_p);
- STRIPE_STACK_UNWIND(readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(final_vec);
- }
-
- goto out;
-
-check_size:
- mlocal->call_count = fctx->stripe_count;
-
- for (index = 0; index < fctx->stripe_count; index++) {
- STACK_WIND(mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]),
- (fctx->xl_array[index])->fops->fstat, mlocal->fd, NULL);
- }
-
-out:
- STRIPE_STACK_DESTROY(frame);
-end:
- return 0;
-}
-
-int32_t
-stripe_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int32_t idx = 0;
- int32_t index = 0;
- int32_t num_stripe = 0;
- int32_t off_index = 0;
- size_t frame_size = 0;
- off_t rounded_end = 0;
- uint64_t tmp_fctx = 0;
- uint64_t stripe_size = 0;
- off_t rounded_start = 0;
- off_t frame_offset = offset;
- off_t dest_offset = 0;
- stripe_local_t *local = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EBADFD;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- goto err;
- }
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
- * the file is in which child node. Always '0-<stripe_size>' part of
- * the file resides in the first child.
- */
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + size, stripe_size);
- num_stripe = (rounded_end - rounded_start) / stripe_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
-
- /* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC(num_stripe, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- off_index = (offset / stripe_size) % fctx->stripe_count;
- local->wind_count = num_stripe;
- local->readv_size = size;
- local->offset = offset;
- local->fd = fd_ref(fd);
- local->fctx = fctx;
-
- for (index = off_index; index < (num_stripe + off_index); index++) {
- rframe = copy_frame(frame);
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame_size = min(gf_roof(frame_offset + 1, stripe_size),
- (offset + size)) -
- frame_offset;
-
- rlocal->node_index = index - off_index;
- rlocal->orig_frame = frame;
- rlocal->readv_size = frame_size;
- rframe->local = rlocal;
- idx = (index % fctx->stripe_count);
-
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(frame_offset, stripe_size,
- fctx->stripe_count);
- else
- dest_offset = frame_offset;
-
- STACK_WIND(rframe, stripe_readv_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->readv, fd, frame_size,
- dest_offset, flags, xdata);
-
- frame_offset += frame_size;
- }
-
- return 0;
-err:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- STRIPE_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
- struct stripe_replies *reply = NULL;
- int32_t i = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- mlocal->replies[local->node_index].op_ret = op_ret;
- mlocal->replies[local->node_index].op_errno = op_errno;
-
- if (op_ret >= 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- /*
- * Only return the number of consecutively written bytes up until
- * the first error. Only return an error if it occurs first.
- *
- * When a short write occurs, the application should retry at the
- * appropriate offset, at which point we'll potentially pass back
- * the error.
- */
- for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
- i++, reply++) {
- if (reply->op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG,
- "reply %d "
- "returned error %s",
- i, strerror(reply->op_errno));
- if (!mlocal->op_ret) {
- mlocal->op_ret = -1;
- mlocal->op_errno = reply->op_errno;
- }
- break;
- }
-
- mlocal->op_ret += reply->op_ret;
-
- if (reply->op_ret < reply->requested_size)
- break;
- }
-
- GF_FREE(mlocal->replies);
-
- STRIPE_STACK_UNWIND(writev, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- struct iovec *tmp_vec = NULL;
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t total_size = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- int32_t tmp_count = count;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- off_t rounded_start = 0;
- off_t rounded_end = 0;
- int32_t total_chunks = 0;
- call_frame_t *wframe = NULL;
- stripe_local_t *wlocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- /* File has to be stripped across the child nodes */
- total_size = iov_length(vector, count);
- remaining_size = total_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + total_size, stripe_size);
- total_chunks = (rounded_end - rounded_start) / stripe_size;
- local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- total_chunks = 0;
- while (1) {
- wframe = copy_frame(frame);
- wlocal = mem_get0(this->local_pool);
- if (!wlocal) {
- op_errno = ENOMEM;
- goto err;
- }
- wlocal->orig_frame = frame;
- wframe->local = wlocal;
-
- /* Send striped chunk of the vector to child
- nodes appropriately. */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, NULL);
- tmp_vec = GF_CALLOC(tmp_count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!tmp_vec) {
- op_errno = ENOMEM;
- goto err;
- }
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, tmp_vec);
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- /*
- * Store off the request index (with respect to the chunk of the
- * initial offset) and the size of the request. This is required
- * in the callback to calculate an appropriate return value in
- * the event of a write failure in one or more requests.
- */
- wlocal->node_index = total_chunks;
- local->replies[total_chunks].requested_size = fill_size;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(wframe, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec, tmp_count,
- dest_offset, flags, iobref, xdata);
-
- GF_FREE(tmp_vec);
- offset_offset += fill_size;
- total_chunks++;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (wframe)
- STRIPE_STACK_DESTROY(wframe);
-
- STRIPE_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(fallocate, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send fallocate request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single fallocate per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->fallocate, fd, mode, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(discard, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
-
- return 0;
-}
-
-int32_t
-stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send discard request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single discard per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->discard, fd, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- GF_ASSERT(frame);
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(zerofill, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->zerofill, fd, dest_offset,
- fill_size, xdata);
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
-{
- /* TBD */
- gf_log(this->name, GF_LOG_INFO, "seek called on %s.",
- uuid_utoa(fd->inode->gfid));
- STRIPE_STACK_UNWIND(seek, frame, -1, ENOTSUP, 0, NULL);
- return 0;
-}
-
-int32_t
-stripe_release(xlator_t *this, fd_t *fd)
-{
- return 0;
-}
-
-int
-stripe_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_fctx = 0;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(inode, err);
-
- (void)inode_ctx_del(inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- goto err;
- }
-
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
-
- if (!fctx->static_array)
- GF_FREE(fctx->xl_array);
-
- GF_FREE(fctx);
-err:
- return 0;
-}
-
-int32_t
-notify(xlator_t *this, int32_t event, void *data, ...)
-{
- stripe_private_t *priv = NULL;
- int down_client = 0;
- int i = 0;
- gf_boolean_t heard_from_all_children = _gf_false;
-
- if (!this)
- return 0;
-
- priv = this->private;
- if (!priv)
- return 0;
-
- switch (event) {
- case GF_EVENT_CHILD_UP: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 0;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
- case GF_EVENT_CHILD_CONNECTING: {
- // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
- goto out;
- }
- case GF_EVENT_CHILD_DOWN: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 1;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
-
- default: {
- /* */
- default_notify(this, event, data);
- goto out;
- } break;
- }
-
- // Consider child as down if it's last_event is not CHILD_UP
- for (i = 0, down_client = 0; i < priv->child_count; i++)
- if (priv->last_event[i] != GF_EVENT_CHILD_UP)
- down_client++;
-
- LOCK(&priv->lock);
- {
- priv->nodes_down = down_client;
- }
- UNLOCK(&priv->lock);
-
- heard_from_all_children = _gf_true;
- for (i = 0; i < priv->child_count; i++)
- if (!priv->last_event[i])
- heard_from_all_children = _gf_false;
-
- if (heard_from_all_children)
- default_notify(this, event, data);
-out:
- return 0;
-}
-
-int
-stripe_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
-
- /**
- * We overwrite ->op_* values here for subsequent failure
- * conditions, hence we propagate the last errno down the
- * stack.
- */
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unlock;
- }
- }
-
-unlock:
- UNLOCK(&frame->lock);
-
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- xdata);
- }
-
- return 0;
-}
-
-#ifdef HAVE_BD_XLATOR
-int
-stripe_is_bd(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_bd = data;
-
- if (data == NULL)
- return 0;
-
- if (XATTR_IS_BD(key))
- *is_bd = _gf_true;
-
- return 0;
-}
-
-static gf_boolean_t
-stripe_setxattr_is_bd(dict_t *dict)
-{
- gf_boolean_t is_bd = _gf_false;
-
- if (dict == NULL)
- goto out;
-
- dict_foreach(dict, stripe_is_bd, &is_bd);
-out:
- return is_bd;
-}
-#else
-#define stripe_setxattr_is_bd(dict) _gf_false
-#endif
-
-int
-stripe_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int i = 0;
- gf_boolean_t is_bd = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- priv = this->private;
- trav = this->children;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
- local->wind_count = priv->child_count;
- local->op_ret = local->op_errno = 0;
-
- is_bd = stripe_setxattr_is_bd(dict);
-
- /**
- * Set xattrs for directories on all subvolumes. Additionally
- * this power is only given to a special client. Bd xlator
- * also needs xattrs for regular files (ie LVs)
- */
- if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
- IA_ISDIR(loc->inode->ia_type)) ||
- is_bd) {
- for (i = 0; i < priv->child_count; i++, trav = trav->next) {
- STACK_WIND(frame, stripe_setxattr_cbk, trav->xlator,
- trav->xlator->fops->setxattr, loc, dict, flags, xdata);
- }
- } else {
- local->wind_count = 1;
- STACK_WIND(frame, stripe_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_is_special_key(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_special = NULL;
-
- if (data == NULL) {
- goto out;
- }
-
- is_special = data;
-
- if (XATTR_IS_LOCKINFO(key) || XATTR_IS_BD(key))
- *is_special = _gf_true;
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fsetxattr_everyone_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int call_count = 0;
- stripe_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_count = --local->wind_count;
-
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- STRIPE_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
- return 0;
-}
-
-int
-stripe_fsetxattr_to_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int flags, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int ret = -1;
- stripe_local_t *local = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (local == NULL) {
- goto out;
- }
-
- frame->local = local;
-
- local->wind_count = priv->child_count;
-
- trav = this->children;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsetxattr_everyone_cbk, trav->xlator,
- trav->xlator->fops->fsetxattr, fd, dict, flags, xdata);
- trav = trav->next;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static gf_boolean_t
-stripe_fsetxattr_is_special(dict_t *dict)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (dict == NULL) {
- goto out;
- }
-
- dict_foreach(dict, stripe_is_special_key, &is_spl);
-
-out:
- return is_spl;
-}
-
-int
-stripe_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
- gf_boolean_t is_spl = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- is_spl = stripe_fsetxattr_is_special(dict);
- if (is_spl) {
- ret = stripe_fsetxattr_to_everyone(frame, this, fd, dict, flags, xdata);
- if (ret < 0) {
- op_errno = ENOMEM;
- goto err;
- }
-
- goto out;
- }
-
- STACK_WIND(frame, stripe_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
-out:
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(this, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(loc, err);
-
- STACK_WIND(frame, stripe_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- STACK_WIND(frame, stripe_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-stripe_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *parent)
-{
- stripe_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- int done = 0;
-
- local = frame->local;
- prev = cookie;
-
- entry = local->dirent;
-
- main_frame = local->orig_frame;
- main_local = main_frame->local;
- LOCK(&frame->lock);
- {
- local->call_count--;
- if (!local->call_count)
- done = 1;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- goto unlock;
- }
-
- if (stripe_ctx_handle(this, prev, local, xattr))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict.");
-
- correct_file_size(stbuf, local->fctx, prev);
-
- stripe_iatt_merge(stbuf, &entry->d_stat);
- local->stbuf_blocks += stbuf->ia_blocks;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (done) {
- inode_ctx_put(entry->inode, this, (uint64_t)(long)local->fctx);
-
- done = 0;
- LOCK(&main_frame->lock);
- {
- main_local->wind_count--;
- if (!main_local->wind_count)
- done = 1;
- if (local->op_ret == -1) {
- main_local->op_errno = local->op_errno;
- main_local->op_ret = local->op_ret;
- }
- entry->d_stat.ia_blocks = local->stbuf_blocks;
- }
- UNLOCK(&main_frame->lock);
- if (done) {
- main_frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, main_frame, main_local->op_ret,
- main_local->op_errno, &main_local->entries,
- NULL);
- gf_dirent_free(&main_local->entries);
- stripe_local_wipe(main_local);
- mem_put(main_local);
- }
- frame->local = NULL;
- stripe_local_wipe(local);
- mem_put(local);
- STRIPE_STACK_DESTROY(frame);
- }
-
- return 0;
-}
-
-int32_t
-stripe_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- gf_dirent_t *local_entry = NULL;
- gf_dirent_t *tmp_entry = NULL;
- xlator_list_t *trav = NULL;
- loc_t loc = {
- 0,
- };
- int32_t count = 0;
- stripe_private_t *priv = NULL;
- int32_t subvols = 0;
- dict_t *xattrs = NULL;
- call_frame_t *local_frame = NULL;
- stripe_local_t *local_ent = NULL;
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
- trav = this->children;
- priv = this->private;
-
- subvols = priv->child_count;
-
- LOCK(&frame->lock);
- {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
-
- if (op_ret != -1) {
- list_splice_init(&orig_entries->list, &local->entries.list);
- local->wind_count = op_ret;
- }
- }
- UNLOCK(&frame->lock);
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- goto out;
- }
-
- xattrs = dict_new();
- if (xattrs)
- (void)stripe_xattr_request_build(this, xattrs, 0, 0, 0, 0);
- count = op_ret;
- list_for_each_entry_safe(local_entry, tmp_entry, (&local->entries.list),
- list)
- {
- if (!local_entry)
- break;
- if (!IA_ISREG(local_entry->d_stat.ia_type) || !local_entry->inode) {
- LOCK(&frame->lock);
- {
- local->wind_count--;
- count = local->wind_count;
- }
- UNLOCK(&frame->lock);
- continue;
- }
-
- local_frame = copy_frame(frame);
-
- if (!local_frame) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- local_ent = mem_get0(this->local_pool);
- if (!local_ent) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- loc.inode = inode_ref(local_entry->inode);
-
- gf_uuid_copy(loc.gfid, local_entry->d_stat.ia_gfid);
-
- local_ent->orig_frame = frame;
-
- local_ent->call_count = subvols;
-
- local_ent->dirent = local_entry;
-
- local_frame->local = local_ent;
-
- trav = this->children;
- while (trav) {
- STACK_WIND(local_frame, stripe_readdirp_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, &loc, xattrs);
- trav = trav->next;
- }
- loc_wipe(&loc);
- }
-out:
- if (!count) {
- /* all entries are directories */
- frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, frame, (local ? local->op_ret : -1),
- (local ? local->op_errno : EINVAL),
- (local ? &local->entries : NULL), NULL);
- gf_dirent_free(&local->entries);
- stripe_local_wipe(local);
- mem_put(local);
- }
- if (xattrs)
- dict_unref(xattrs);
- return 0;
-}
-int32_t
-stripe_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->fd = fd_ref(fd);
-
- local->wind_count = 0;
-
- local->count = 0;
- local->op_ret = -1;
- INIT_LIST_HEAD(&local->entries);
-
- if (!trav)
- goto err;
-
- STACK_WIND(frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off, xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- goto out;
-
- ret = xlator_mem_acct_init(this, gf_stripe_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- goto out;
- }
-
-out:
- return ret;
-}
-
-static int
-clear_pattern_list(stripe_private_t *priv)
-{
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
-
- trav = priv->pattern;
- priv->pattern = NULL;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-reconfigure(xlator_t *this, dict_t *options)
-{
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- int ret = -1;
- volume_option_t *opt = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(this->private);
-
- priv = this->private;
-
- ret = 0;
- LOCK(&priv->lock);
- {
- ret = clear_pattern_list(priv);
- if (ret)
- goto unlock;
-
- data = dict_get(options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- } else {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING,
- "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
-
- if (gf_string2bytesize_uint64(opt->default_value,
- &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- }
-
- GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, unlock);
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * init - This function is called when xlator-graph gets initialized.
- * The option given in volfiles are parsed here.
- * @this -
- */
-int32_t
-init(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- volume_option_t *opt = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- int32_t count = 0;
- int ret = -1;
-
- if (!this)
- goto out;
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- if (!count) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured without \"subvolumes\" option. "
- "exiting");
- goto out;
- }
-
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
-
- if (count == 1) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured with only one \"subvolumes\" option."
- " please check the volume. exiting");
- goto out;
- }
-
- priv = GF_CALLOC(1, sizeof(stripe_private_t),
- gf_stripe_mt_stripe_private_t);
-
- if (!priv)
- goto out;
- priv->xl_array = GF_CALLOC(count, sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!priv->xl_array)
- goto out;
-
- priv->last_event = GF_CALLOC(count, sizeof(int), gf_stripe_mt_int32_t);
- if (!priv->last_event)
- goto out;
-
- priv->child_count = count;
- LOCK_INIT(&priv->lock);
-
- trav = this->children;
- count = 0;
- while (trav) {
- priv->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
-
- if (count > 256) {
- gf_log(this->name, GF_LOG_ERROR,
- "maximum number of stripe subvolumes supported "
- "is 256");
- goto out;
- }
-
- ret = 0;
- LOCK(&priv->lock);
- {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING, "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
- if (gf_string2bytesize_uint64(opt->default_value, &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- /* option stripe-pattern *avi:1GB,*pdf:16K */
- data = dict_get(this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- }
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- GF_OPTION_INIT("use-xattr", priv->xattr_supported, bool, out);
- /* notify related */
- priv->nodes_down = priv->child_count;
-
- GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
-
- this->local_pool = mem_pool_new(stripe_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- this->private = priv;
-
- ret = 0;
-out:
- if (ret) {
- if (priv) {
- GF_FREE(priv->xl_array);
- GF_FREE(priv);
- }
- }
- return ret;
-}
-
-/**
- * fini - Free all the private variables
- * @this -
- */
-void
-fini(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
-
- if (!this)
- goto out;
-
- priv = this->private;
- if (priv) {
- this->private = NULL;
- GF_FREE(priv->xl_array);
-
- trav = priv->pattern;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
- GF_FREE(priv->last_event);
- LOCK_DESTROY(&priv->lock);
- GF_FREE(priv);
- }
-
-out:
- return;
-}
-
-int32_t
-stripe_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
-
-{
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int
-stripe_internal_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
-{
- char size_key[256] = {
- 0,
- };
- char index_key[256] = {
- 0,
- };
- char count_key[256] = {
- 0,
- };
- char coalesce_key[256] = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- if (!xattr || (op_ret == -1))
- goto out;
-
- sprintf(size_key, "trusted.%s.stripe-size", this->name);
- sprintf(count_key, "trusted.%s.stripe-count", this->name);
- sprintf(index_key, "trusted.%s.stripe-index", this->name);
- sprintf(coalesce_key, "trusted.%s.stripe-coalesce", this->name);
-
- dict_del(xattr, size_key);
- dict_del(xattr, count_key);
- dict_del(xattr, index_key);
- dict_del(xattr, coalesce_key);
-
-out:
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
-
- return 0;
-}
-
-int
-stripe_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
- }
- UNLOCK(&frame->lock);
-
- if (!xattr || (op_ret < 0))
- goto out;
-
- local->op_ret = 0;
-
- if (!local->xattr) {
- local->xattr = dict_ref(xattr);
- } else {
- stripe_aggregate_xattr(local->xattr, xattr);
- }
-
-out:
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(getxattr, frame, (local ? local->op_ret : -1),
- op_errno, (local ? local->xattr : NULL), xdata);
- }
-
- return 0;
-}
-
-int32_t
-stripe_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
- int32_t ret = -1;
- long cky = 0;
- void *xattr_val = NULL;
- void *xattr_serz = NULL;
- stripe_xattr_sort_t *xattr = NULL;
- dict_t *stripe_xattr = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
- cky = (long)cookie;
-
- if (local->xsel[0] == '\0') {
- gf_log(this->name, GF_LOG_ERROR, "Empty xattr in cbk");
- return ret;
- }
-
- LOCK(&frame->lock);
- {
- callcnt = --local->wind_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *)GF_CALLOC(
- local->nallocs, sizeof(stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
-
- if (local->xattr_list) {
- xattr = local->xattr_list + (int32_t)cky;
-
- ret = dict_get_ptr_and_len(dict, local->xsel, &xattr_val,
- &xattr->xattr_len);
- if (xattr->xattr_len == 0)
- goto out;
-
- xattr->pos = cky;
- xattr->xattr_value = gf_memdup(xattr_val, xattr->xattr_len);
-
- if (xattr->xattr_value != NULL)
- local->xattr_total_len += xattr->xattr_len + 1;
- }
- }
-out:
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (!local->xattr_total_len)
- goto unwind;
-
- stripe_xattr = dict_new();
- if (!stripe_xattr)
- goto unwind;
-
- /* select filler based on ->xsel */
- if (XATTR_IS_PATHINFO(local->xsel))
- ret = stripe_fill_pathinfo_xattr(this, local, (char **)&xattr_serz);
- else if (XATTR_IS_LOCKINFO(local->xsel)) {
- ret = stripe_fill_lockinfo_xattr(this, local, &xattr_serz);
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Unknown xattr in xattr request");
- goto unwind;
- }
-
- if (!ret) {
- ret = dict_set_dynptr(stripe_xattr, local->xsel, xattr_serz,
- local->xattr_total_len);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "Can't set %s key in dict",
- local->xsel);
- }
-
- unwind:
- /*
- * Among other things, STRIPE_STACK_UNWIND will free "local"
- * for us. That means we can't dereference it afterward.
- * Fortunately, the actual result is in stripe_xattr now, so we
- * can simply clean up before unwinding.
- */
- ret = stripe_free_xattr_str(local);
- GF_FREE(local->xattr_list);
- local->xattr_list = NULL;
-
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, stripe_xattr,
- NULL);
-
- if (stripe_xattr)
- dict_unref(stripe_xattr);
- }
-
- return ret;
-}
-
-int
-stripe_marker_populate_args(call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
-{
- xlator_t *this = frame->this;
- stripe_private_t *priv = this->private;
- stripe_local_t *local = frame->local;
- int count = 0;
-
- count = priv->child_count;
- if (MARKER_XTIME_TYPE == type) {
- if (!IA_FILE_OR_DIR(local->loc.inode->ia_type))
- count = 1;
- }
- memcpy(subvols, priv->xl_array, sizeof(*subvols) * count);
-
- return count;
-}
-
-int32_t
-stripe_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- if (name && strncmp(name, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY)) == 0) {
- local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND(frame, stripe_getxattr_cbk, trav->xlator,
- trav->xlator->fops->getxattr, loc, name, xdata);
- }
-
- return 0;
- }
-
- if (name && (XATTR_IS_PATHINFO(name))) {
- if (IA_ISREG(loc->inode->ia_type)) {
- ret = inode_ctx_get(loc->inode, this, (uint64_t *)&local->fctx);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "stripe size unavailable from fctx"
- " relying on pathinfo could lead to"
- " wrong results");
- }
-
- local->nallocs = local->wind_count = priv->child_count;
- (void)strncpy(local->xsel, name, strlen(name));
-
- /**
- * for xattrs that need info from all children, fill ->xsel
- * as above and call the filler function in cbk based on
- * it
- */
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->getxattr, loc,
- name, xdata);
- }
-
- return 0;
- }
-
- if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
- stripe_getxattr_unwind,
- stripe_marker_populate_args) == 0)
- return 0;
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-static gf_boolean_t
-stripe_is_special_xattr(const char *name)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (!name) {
- goto out;
- }
-
- if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) ||
- XATTR_IS_PATHINFO(name))
- is_spl = _gf_true;
-out:
- return is_spl;
-}
-
-int32_t
-stripe_fgetxattr_from_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t ret = -1, op_errno = 0;
- int i = 0;
- xlator_list_t *trav = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- frame->local = local;
-
- strncpy(local->xsel, name, strlen(name));
- local->nallocs = local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->fgetxattr, fd, name,
- xdata);
- }
-
- return 0;
-
-err:
- STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return ret;
-}
-
-int32_t
-stripe_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- if (stripe_is_special_xattr(name)) {
- stripe_fgetxattr_from_everyone(frame, this, fd, name, xdata);
- goto out;
- }
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_priv_dump(xlator_t *this)
-{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- stripe_private_t *priv = NULL;
- int ret = -1;
- struct stripe_options *options = NULL;
-
- GF_VALIDATE_OR_GOTO("stripe", this, out);
-
- priv = this->private;
- if (!priv)
- goto out;
-
- ret = TRY_LOCK(&priv->lock);
- if (ret != 0)
- goto out;
-
- gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
- gf_proc_dump_write("child_count", "%d", priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sprintf(key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", priv->xl_array[i]->type,
- priv->xl_array[i]->name);
- }
-
- options = priv->pattern;
- while (options != NULL) {
- gf_proc_dump_write("path_pattern", "%s", priv->pattern->path_pattern);
- gf_proc_dump_write("options_block_size", "%ul", options->block_size);
-
- options = options->next;
- }
-
- gf_proc_dump_write("block_size", "%ul", priv->block_size);
- gf_proc_dump_write("nodes-down", "%d", priv->nodes_down);
- gf_proc_dump_write("first-child_down", "%d", priv->first_child_down);
- gf_proc_dump_write("xattr_supported", "%d", priv->xattr_supported);
-
- UNLOCK(&priv->lock);
-
-out:
- return ret;
-}
-
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
- .setxattr = stripe_setxattr,
- .fsetxattr = stripe_fsetxattr,
- .getxattr = stripe_getxattr,
- .fgetxattr = stripe_fgetxattr,
- .removexattr = stripe_removexattr,
- .fremovexattr = stripe_fremovexattr,
- .readdirp = stripe_readdirp,
- .fallocate = stripe_fallocate,
- .discard = stripe_discard,
- .zerofill = stripe_zerofill,
- .seek = stripe_seek,
-};
-
-struct xlator_cbks cbks = {
- .release = stripe_release,
- .forget = stripe_forget,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = stripe_priv_dump,
-};
-
-struct volume_options options[] = {
- {
- .key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZE_LIST,
- .default_value = "128KB",
- .min = STRIPE_MIN_BLOCK_SIZE,
- .description = "Size of the stripe unit that would be read "
- "from or written to the striped servers.",
- .op_version = {1},
- .tags = {"stripe"},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- },
- {
- .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "handle the stripe without the xattr",
- .tags = {"stripe", "dev-only"},
- .flags = OPT_FLAG_CLIENT_OPT,
- },
- {
- .key = {"coalesce"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Enable/Disable coalesce mode to flatten striped "
- "files as stored on the server (i.e., eliminate holes "
- "caused by the traditional format).",
- .op_version = {1},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- .tags = {"stripe"},
- },
- {.key = {NULL}},
-};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
deleted file mode 100644
index 103c96491ff..00000000000
--- a/xlators/cluster/stripe/src/stripe.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _STRIPE_H_
-#define _STRIPE_H_
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "stripe-mem-types.h"
-#include "libxlator.h"
-#include <fnmatch.h>
-#include <signal.h>
-
-#define STRIPE_PATHINFO_HEADER "STRIPE:"
-#define STRIPE_MIN_BLOCK_SIZE (16 * GF_UNIT_KB)
-
-#define STRIPE_STACK_UNWIND(fop, frame, params...) \
- do { \
- stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_STACK_DESTROY(frame) \
- do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY(frame->root); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_VALIDATE_FCTX(fctx, label) \
- do { \
- int idx = 0; \
- if (!fctx) { \
- op_errno = EINVAL; \
- goto label; \
- } \
- for (idx = 0; idx < fctx->stripe_count; idx++) { \
- if (!fctx->xl_array[idx]) { \
- gf_log(this->name, GF_LOG_ERROR, "fctx->xl_array[%d] is NULL", \
- idx); \
- op_errno = ESTALE; \
- goto label; \
- } \
- } \
- } while (0)
-
-typedef struct stripe_xattr_sort {
- int pos;
- int xattr_len;
- char *xattr_value;
-} stripe_xattr_sort_t;
-
-/**
- * struct stripe_options : This keeps the pattern and the block-size
- * information, which is used for striping on a file.
- */
-struct stripe_options {
- struct stripe_options *next;
- char path_pattern[256];
- uint64_t block_size;
-};
-
-/**
- * Private structure for stripe translator
- */
-struct stripe_private {
- struct stripe_options *pattern;
- xlator_t **xl_array;
- uint64_t block_size;
- gf_lock_t lock;
- uint8_t nodes_down;
- int8_t first_child_down;
- int *last_event;
- int8_t child_count;
- gf_boolean_t xattr_supported; /* default yes */
- gf_boolean_t coalesce;
- char vol_uuid[UUID_SIZE + 1];
-};
-
-/**
- * Used to keep info about the replies received from readv/writev calls
- */
-struct stripe_replies {
- struct iovec *vector;
- int32_t count; // count of vector
- int32_t op_ret; // op_ret of readv
- int32_t op_errno;
- int32_t requested_size;
- struct iatt stbuf; /* 'stbuf' is also a part of reply */
-};
-
-typedef struct _stripe_fd_ctx {
- off_t stripe_size;
- int stripe_count;
- int stripe_coalesce;
- int static_array;
- xlator_t **xl_array;
-} stripe_fd_ctx_t;
-
-/**
- * Local structure to be passed with all the frames in case of STACK_WIND
- */
-struct stripe_local; /* this itself is used inside the structure; */
-
-struct stripe_local {
- struct stripe_local *next;
- call_frame_t *orig_frame;
-
- stripe_fd_ctx_t *fctx;
-
- /* Used by _cbk functions */
- struct iatt stbuf;
- struct iatt pre_buf;
- struct iatt post_buf;
- struct iatt preparent;
- struct iatt postparent;
-
- off_t stbuf_size;
- off_t prebuf_size;
- off_t postbuf_size;
- off_t preparent_size;
- off_t postparent_size;
-
- blkcnt_t stbuf_blocks;
- blkcnt_t prebuf_blocks;
- blkcnt_t postbuf_blocks;
- blkcnt_t preparent_blocks;
- blkcnt_t postparent_blocks;
-
- struct stripe_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
-
- int8_t revalidate;
- int8_t failed;
- int8_t unwind;
-
- size_t readv_size;
- int32_t entry_count;
- int32_t node_index;
- int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
- in case of read and write */
- int32_t op_ret;
- int32_t op_errno;
- int32_t count;
- int32_t flags;
- char *name;
- inode_t *inode;
-
- loc_t loc;
- loc_t loc2;
-
- mode_t mode;
- dev_t rdev;
- /* For File I/O fops */
- dict_t *xdata;
-
- stripe_xattr_sort_t *xattr_list;
- int32_t xattr_total_len;
- int32_t nallocs;
- char xsel[256];
-
- /* General usage */
- off_t offset;
- off_t stripe_size;
-
- int xattr_self_heal_needed;
- int entry_self_heal_needed;
-
- int8_t *list;
- struct gf_flock lock;
- fd_t *fd;
- void *value;
- struct iobref *iobref;
- gf_dirent_t entries;
- gf_dirent_t *dirent;
- dict_t *xattr;
- uuid_t ia_gfid;
-
- int xflag;
- mode_t umask;
-};
-
-typedef struct stripe_local stripe_local_t;
-typedef struct stripe_private stripe_private_t;
-
-/*
- * Determine the stripe index of a particular frame based on the translator.
- */
-static inline int32_t
-stripe_get_frame_index(stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int32_t i, idx = -1;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (fctx->xl_array[i] == prev->this) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static inline void
-stripe_copy_xl_array(xlator_t **dst, xlator_t **src, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- dst[i] = src[i];
-}
-
-void
-stripe_local_wipe(stripe_local_t *local);
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict);
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src);
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce);
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv);
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data);
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to);
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz);
-int32_t
-stripe_free_xattr_str(stripe_local_t *local);
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total);
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index);
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz);
-
-/*
- * Adjust the size attribute for files if coalesce is enabled.
- */
-static inline void
-correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int index;
-
- if (!IA_ISREG(buf->ia_type))
- return;
-
- if (!fctx || !fctx->stripe_coalesce)
- return;
-
- index = stripe_get_frame_index(fctx, prev);
- buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
- fctx->stripe_count, index);
-}
-
-#endif /* _STRIPE_H_ */
diff --git a/xlators/debug/delay-gen/src/delay-gen-mem-types.h b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
index 63a15a70da3..c89a9217193 100644
--- a/xlators/debug/delay-gen/src/delay-gen-mem-types.h
+++ b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __DELAY_GEN_MEM_TYPES_H__
#define __DELAY_GEN_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_delay_gen_mem_types_ {
gf_delay_gen_mt_dg_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/delay-gen/src/delay-gen-messages.h b/xlators/debug/delay-gen/src/delay-gen-messages.h
index a9046ca14bf..bc98cec2885 100644
--- a/xlators/debug/delay-gen/src/delay-gen-messages.h
+++ b/xlators/debug/delay-gen/src/delay-gen-messages.h
@@ -11,7 +11,7 @@
#ifndef __DELAY_GEN_MESSAGES_H__
#define __DELAY_GEN_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/debug/delay-gen/src/delay-gen.c b/xlators/debug/delay-gen/src/delay-gen.c
index a2d02527f23..4698f1fd785 100644
--- a/xlators/debug/delay-gen/src/delay-gen.c
+++ b/xlators/debug/delay-gen/src/delay-gen.c
@@ -27,7 +27,7 @@ delay_gen(xlator_t *this, int fop)
return 0;
if ((rand() % DELAY_GRANULARITY) < dg->delay_ppm)
- usleep(dg->delay_duration);
+ gf_nanosleep(dg->delay_duration * GF_US_IN_NS);
return 0;
}
@@ -679,4 +679,19 @@ struct volume_options options[] = {
.default_value = "",
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "delay-gen",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/delay-gen/src/delay-gen.h b/xlators/debug/delay-gen/src/delay-gen.h
index 5e4d179f0b4..afa95e5eb2d 100644
--- a/xlators/debug/delay-gen/src/delay-gen.h
+++ b/xlators/debug/delay-gen/src/delay-gen.h
@@ -13,9 +13,9 @@
#include "delay-gen-mem-types.h"
#include "delay-gen-messages.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
typedef struct {
int enable[GF_FOP_MAXVALUE];
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index 2facd6b27cb..b9b713af8fc 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_error_gen_mem_types_ {
gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index c6595b4c0e4..d45655ef4c3 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "error-gen.h"
-#include "statedump.h"
-#include "defaults.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/defaults.h>
/*
* The user can specify an error probability as a float percentage, but we
@@ -31,9 +31,9 @@
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = {.error_no_count = 4,
.error_no = {ENOENT, ENOTDIR, ENAMETOOLONG, EAGAIN}},
- [GF_FOP_STAT] = {.error_no_count = 7,
- .error_no = {EACCES, EBADF, EFAULT, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR}},
+ [GF_FOP_STAT] = {.error_no_count = 6,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOENT, ENOMEM,
+ ENOTDIR}},
[GF_FOP_READLINK] = {.error_no_count = 8,
.error_no = {EACCES, EFAULT, EINVAL, EIO, ENAMETOOLONG,
ENOENT, ENOMEM, ENOTDIR}},
@@ -79,21 +79,20 @@ sys_error_t error_no_list[] = {
[GF_FOP_WRITE] = {.error_no_count = 7,
.error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG,
ENOSPC, GF_ERROR_SHORT_WRITE}},
- [GF_FOP_STATFS] = {.error_no_count = 10,
- .error_no = {EACCES, EBADF, EFAULT, EINTR, EIO,
- ENAMETOOLONG, ENOENT, ENOMEM, ENOSYS,
- ENOTDIR}},
+ [GF_FOP_STATFS] = {.error_no_count = 9,
+ .error_no = {EACCES, EFAULT, EINTR, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSYS, ENOTDIR}},
[GF_FOP_FLUSH] = {.error_no_count = 5,
.error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS,
ENOENT}},
[GF_FOP_FSYNC] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_SETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_GETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
- [GF_FOP_REMOVEXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_SETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_REMOVEXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FSETXATTR] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
[GF_FOP_FGETXATTR] = {.error_no_count = 4,
@@ -125,26 +124,25 @@ sys_error_t error_no_list[] = {
ENOENT}},
[GF_FOP_FXATTROP] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_INODELK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
+ [GF_FOP_INODELK] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
[GF_FOP_FINODELK] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_ENTRYLK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_ENTRYLK] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FENTRYLK] = {.error_no_count = 10,
.error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
ENOMEM}},
- [GF_FOP_SETATTR] = {.error_no_count = 11,
+ [GF_FOP_SETATTR] = {.error_no_count = 10,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
- EIO}},
+ ENOMEM, ENOTDIR, EPERM, EROFS, EIO}},
[GF_FOP_FSETATTR] = {.error_no_count = 11,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
EIO}},
- [GF_FOP_GETSPEC] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}};
+ [GF_FOP_GETSPEC] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}}};
int
generate_rand_no(int op_no)
@@ -761,6 +759,7 @@ error_gen_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
shortvec = iov_dup(vector, 1);
shortvec->iov_len /= 2;
+ count = 1;
goto wind;
} else if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
@@ -1396,7 +1395,7 @@ error_gen_priv_dump(xlator_t *this)
gf_proc_dump_write("op_count", "%d", conf->op_count);
gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
- gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("error_no", "%d", conf->error_no_int);
gf_proc_dump_write("random_failure", "%d", conf->random_failure);
UNLOCK(&conf->lock);
@@ -1430,6 +1429,7 @@ reconfigure(xlator_t *this, dict_t *options)
eg_t *pvt = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
+ char *error_no = NULL;
double failure_percent_dbl = 0.0;
if (!this || !this->private)
@@ -1439,10 +1439,10 @@ reconfigure(xlator_t *this, dict_t *options)
ret = -1;
- GF_OPTION_RECONF("error-no", pvt->error_no, options, str, out);
+ GF_OPTION_RECONF("error-no", error_no, options, str, out);
- if (pvt->error_no)
- pvt->error_no_int = conv_errno_to_int(&pvt->error_no);
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
GF_OPTION_RECONF("failure", failure_percent_dbl, options, percent, out);
@@ -1466,6 +1466,7 @@ init(xlator_t *this)
eg_t *pvt = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
+ char *error_no = NULL;
double failure_percent_dbl = 0.0;
if (!this->children || this->children->next) {
@@ -1490,10 +1491,10 @@ init(xlator_t *this)
ret = -1;
- GF_OPTION_INIT("error-no", pvt->error_no, str, out);
+ GF_OPTION_INIT("error-no", error_no, str, out);
- if (pvt->error_no)
- pvt->error_no_int = conv_errno_to_int(&pvt->error_no);
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
GF_OPTION_INIT("failure", failure_percent_dbl, percent, out);
@@ -1506,8 +1507,8 @@ init(xlator_t *this)
this->private = pvt;
- /* Give some seed value here */
- srand(time(NULL));
+ /* Give some seed value here. */
+ srand(gf_time());
ret = 0;
out:
@@ -1644,4 +1645,19 @@ struct volume_options options[] = {
.flags = OPT_FLAG_SETTABLE,
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "error-gen",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index ffa09252d0f..2478cd5b21c 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -36,7 +36,6 @@ typedef struct {
* It's just not worth blowing up the diff by changing it.
*/
int failure_iter_no;
- char *error_no;
int error_no_int;
gf_boolean_t random_failure;
gf_lock_t lock;
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index bc25fd2ca4e..51d38d8b97c 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
extern const char *__progname;
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 06a16c8f3e6..aa00c446e5a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -7,8 +7,8 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "syscall.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
/**
* xlators/debug/io_stats :
@@ -28,18 +28,18 @@
#include <fnmatch.h>
#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "io-stats-mem-types.h"
#include <stdarg.h>
-#include "defaults.h"
-#include "logging.h"
-#include "cli1-xdr.h"
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
#include <pwd.h>
#include <grp.h>
-#include "upcall-utils.h"
+#include <glusterfs/upcall-utils.h>
+#include <glusterfs/async.h>
#define MAX_LIST_MEMBERS 100
#define DEFAULT_PWD_BUF_SZ 16384
@@ -66,6 +66,17 @@ typedef enum {
IOS_STATS_THRU_MAX,
} ios_stats_thru_t;
+/* This is same as gf1_cli_info_op */
+/* had to be defined here again, so we have modularity between
+ xdr, xlator, and library functions */
+typedef enum ios_info_op {
+ GF_IOS_INFO_NONE = 0,
+ GF_IOS_INFO_ALL = 1,
+ GF_IOS_INFO_INCREMENTAL = 2,
+ GF_IOS_INFO_CUMULATIVE = 3,
+ GF_IOS_INFO_CLEAR = 4,
+} ios_info_op_t;
+
struct ios_stat_lat {
struct timeval time;
double throughput;
@@ -124,7 +135,7 @@ struct ios_global_stats {
gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
gf_atomic_t fop_hits[GF_FOP_MAXVALUE];
gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE];
- struct timeval started_at;
+ time_t started_at;
struct ios_lat latency[GF_FOP_MAXVALUE];
uint64_t nr_opens;
uint64_t max_nr_opens;
@@ -173,7 +184,6 @@ struct ios_conf {
*/
char *unique_id;
ios_dump_type_t dump_format;
- char *dump_format_str;
};
struct ios_fd {
@@ -282,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame)
begin = &frame->begin; \
end = &frame->end; \
\
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \
- (end->tv_nsec - begin->tv_nsec)) / \
- 1000; \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
throughput = op_ret / elapsed; \
\
conf = this->private; \
@@ -668,10 +676,7 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
FILE *logfp, ios_stats_thru_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {
- 0,
- };
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -679,12 +684,9 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
{
list_for_each_entry(entry, &list_head->iosstats->list, list)
{
- gf_time_fmt(timestr, sizeof timestr,
- entry->iosstat->thru_counters[type].time.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &entry->iosstat->thru_counters[type].time,
+ gf_timefmt_FT);
ios_log(this, logfp, "%s \t %-10.2f \t %s", timestr, entry->value,
entry->iosstat->filename);
@@ -763,9 +765,8 @@ err:
int
io_stats_dump_global_to_json_logfp(xlator_t *this,
- struct ios_global_stats *stats,
- struct timeval *now, int interval,
- FILE *logfp)
+ struct ios_global_stats *stats, time_t now,
+ int interval, FILE *logfp)
{
int i = 0;
int j = 0;
@@ -791,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
};
dict_t *xattr = NULL;
- interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
- (stats->started_at.tv_sec * 1000000.0 +
- stats->started_at.tv_usec)) /
- 1000000.0;
+ interval_sec = (double)(now - stats->started_at);
conf = this->private;
@@ -824,19 +822,18 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.read_%d%s\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.read_%d%s\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, rw_size, rw_unit,
GF_ATOMIC_GET(stats->block_count_read[i]));
- ios_log(this, logfp,
- "\"%s.%s.write_%d%s\": \"%" GF_PRI_ATOMIC "\",", key_prefix,
- str_prefix, rw_size, rw_unit,
+ ios_log(this, logfp, "\"%s.%s.write_%d%s\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, rw_size, rw_unit,
GF_ATOMIC_GET(stats->block_count_write[i]));
} else {
- ios_log(this, logfp, "\"%s.%s.read_%d%s_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.read_%d%s_per_sec\": %0.2lf,",
key_prefix, str_prefix, rw_size, rw_unit,
(double)(GF_ATOMIC_GET(stats->block_count_read[i]) /
interval_sec));
- ios_log(this, logfp, "\"%s.%s.write_%d%s_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.write_%d%s_per_sec\": %0.2lf,",
key_prefix, str_prefix, rw_size, rw_unit,
(double)(GF_ATOMIC_GET(stats->block_count_write[i]) /
interval_sec));
@@ -844,9 +841,9 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.fds.open_count\": \"%" PRId64 "\",",
+ ios_log(this, logfp, "\"%s.%s.fds.open_count\": %" PRId64 ",",
key_prefix, str_prefix, conf->cumulative.nr_opens);
- ios_log(this, logfp, "\"%s.%s.fds.max_open_count\": \"%" PRId64 "\",",
+ ios_log(this, logfp, "\"%s.%s.fds.max_open_count\": %" PRId64 ",",
key_prefix, str_prefix, conf->cumulative.max_nr_opens);
}
@@ -868,20 +865,19 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
}
if (interval == -1) {
- ios_log(this, logfp,
- "\"%s.%s.fop.%s.count\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, lc_fop_name, fop_hits);
} else {
- ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name,
(double)(fop_hits / interval_sec));
}
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_ave_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_ave_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_ave);
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_min_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_min_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_min);
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_max);
fop_ave_usec_sum += fop_lat_ave;
@@ -896,18 +892,17 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
*/
ios_log(this, logfp,
"\"%s.%s.fop.weighted_latency_ave_usec_nozerofill\": "
- "\"%0.4lf\",",
+ "%0.4lf,",
key_prefix, str_prefix, weighted_fop_ave_usec);
}
- ios_log(this, logfp, "\"%s.%s.fop.weighted_latency_ave_usec\": \"%0.4lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_latency_ave_usec\": %0.4lf,",
key_prefix, str_prefix, weighted_fop_ave_usec);
- ios_log(this, logfp, "\"%s.%s.fop.weighted_fop_count\": \"%ld\",",
- key_prefix, str_prefix, total_fop_hits);
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_fop_count\": %ld,", key_prefix,
+ str_prefix, total_fop_hits);
fop_ave_usec = fop_ave_usec_sum / GF_FOP_MAXVALUE;
- ios_log(this, logfp,
- "\"%s.%s.fop.unweighted_latency_ave_usec\":\"%0.4lf\",", key_prefix,
- str_prefix, fop_ave_usec);
+ ios_log(this, logfp, "\"%s.%s.fop.unweighted_latency_ave_usec\":%0.4lf,",
+ key_prefix, str_prefix, fop_ave_usec);
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
lc_fop_name = strdupa(gf_upcall_list[i]);
@@ -916,11 +911,10 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
fop_hits = GF_ATOMIC_GET(stats->upcall_hits[i]);
if (interval == -1) {
- ios_log(this, logfp,
- "\"%s.%s.fop.%s.count\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, lc_fop_name, fop_hits);
} else {
- ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name,
(double)(fop_hits / interval_sec));
}
@@ -937,7 +931,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
dict_foreach_inline(xattr, curr)
{
- ios_log(this, logfp, "\"%s.%s.%s.queue_size\": \"%d\",", key_prefix,
+ ios_log(this, logfp, "\"%s.%s.%s.queue_size\": %d,", key_prefix,
str_prefix, curr->key, data_to_int32(curr->value));
}
@@ -950,23 +944,23 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.uptime\": \"%" PRId64 "\",", key_prefix,
- str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix,
+ str_prefix, (uint64_t)(now - stats->started_at));
ios_log(this, logfp,
- "\"%s.%s.bytes_read\": \""
- "%" GF_PRI_ATOMIC "\",",
+ "\"%s.%s.bytes_read\": "
+ "%" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp,
- "\"%s.%s.bytes_written\": \""
- "%" GF_PRI_ATOMIC "\"",
+ "\"%s.%s.bytes_written\": "
+ "%" GF_PRI_ATOMIC "",
key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_written));
} else {
- ios_log(this, logfp, "\"%s.%s.sample_interval_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.sample_interval_sec\": %0.2lf,",
key_prefix, str_prefix, interval_sec);
- ios_log(this, logfp, "\"%s.%s.bytes_read_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.bytes_read_per_sec\": %0.2lf,",
key_prefix, str_prefix,
(double)(GF_ATOMIC_GET(stats->data_read) / interval_sec));
- ios_log(this, logfp, "\"%s.%s.bytes_written_per_sec\": \"%0.2lf\"",
+ ios_log(this, logfp, "\"%s.%s.bytes_written_per_sec\": %0.2lf",
key_prefix, str_prefix,
(double)(GF_ATOMIC_GET(stats->data_written) / interval_sec));
}
@@ -1203,14 +1197,14 @@ out:
int
io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE *logfp)
+ time_t now, int interval, FILE *logfp)
{
int i = 0;
int per_line = 0;
int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char str_header[128] = {0};
@@ -1226,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
ios_log(this, logfp, "\n=== Cumulative stats ===");
else
ios_log(this, logfp, "\n=== Interval %d stats ===", interval);
- ios_log(this, logfp, " Duration : %" PRId64 " secs",
- (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, " Duration : %" PRIu64 " secs",
+ (uint64_t)(now - stats->started_at));
ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC,
GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n",
@@ -1319,11 +1313,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
if (interval == -1) {
LOCK(&conf->lock);
{
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time, gf_timefmt_FT);
ios_log(this, logfp,
"Current open fd's: %" PRId64 " Max open fd's: %" PRId64
" time %s",
@@ -1375,10 +1366,10 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
int
io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, dict_t *dict)
+ time_t now, int interval, dict_t *dict)
{
int ret = 0;
- char key[256] = {0};
+ char key[64] = {0};
uint64_t sec = 0;
int i = 0;
uint64_t count = 0;
@@ -1401,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
interval);
snprintf(key, sizeof(key), "%d-duration", interval);
- sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec);
+ sec = now - stats->started_at;
ret = dict_set_uint64(dict, key, sec);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -1524,9 +1515,8 @@ out:
}
int
-io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval,
- struct ios_dump_args *args)
+io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now,
+ int interval, struct ios_dump_args *args)
{
int ret = -1;
@@ -1584,24 +1574,24 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type,
}
static void
-ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now)
+ios_global_stats_clear(struct ios_global_stats *stats, time_t now)
{
GF_ASSERT(stats);
GF_ASSERT(now);
memset(stats, 0, sizeof(*stats));
- stats->started_at = *now;
+ stats->started_at = now;
}
int
-io_stats_dump(xlator_t *this, struct ios_dump_args *args, gf1_cli_info_op op,
+io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
gf_boolean_t is_peek)
{
struct ios_conf *conf = NULL;
struct ios_global_stats cumulative = {};
struct ios_global_stats incremental = {};
int increment = 0;
- struct timeval now;
+ time_t now = 0;
GF_ASSERT(this);
GF_ASSERT(args);
@@ -1609,31 +1599,31 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, gf1_cli_info_op op,
GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX);
conf = this->private;
+ now = gf_time();
- gettimeofday(&now, NULL);
LOCK(&conf->lock);
{
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_CUMULATIVE)
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
cumulative = conf->cumulative;
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_INCREMENTAL) {
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL) {
incremental = conf->incremental;
increment = conf->increment;
if (!is_peek) {
increment = conf->increment++;
- ios_global_stats_clear(&conf->incremental, &now);
+ ios_global_stats_clear(&conf->incremental, now);
}
}
}
UNLOCK(&conf->lock);
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_CUMULATIVE)
- io_stats_dump_global(this, &cumulative, &now, -1, args);
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
+ io_stats_dump_global(this, &cumulative, now, -1, args);
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_INCREMENTAL)
- io_stats_dump_global(this, &incremental, &now, increment, args);
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL)
+ io_stats_dump_global(this, &incremental, now, increment, args);
return 0;
}
@@ -1643,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
{
struct ios_conf *conf = NULL;
struct timeval now;
- uint64_t sec = 0;
- uint64_t usec = 0;
int i = 0;
+ double usecs = 0;
uint64_t data_read = 0;
uint64_t data_written = 0;
uint64_t block_count_read = 0;
@@ -1660,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
return 0;
gettimeofday(&now, NULL);
-
- if (iosfd->opened_at.tv_usec > now.tv_usec) {
- now.tv_usec += 1000000;
- now.tv_usec--;
- }
-
- sec = now.tv_sec - iosfd->opened_at.tv_sec;
- usec = now.tv_usec - iosfd->opened_at.tv_usec;
+ usecs = gf_tvdiff(&iosfd->opened_at, &now);
gf_log(this->name, GF_LOG_INFO, "--- fd stats ---");
if (iosfd->filename)
gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename);
- if (sec)
- gf_log(this->name, GF_LOG_INFO,
- " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec);
+ if (usecs)
+ gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs);
data_read = GF_ATOMIC_GET(iosfd->data_read);
if (data_read)
@@ -1779,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
begin = &frame->begin;
end = &frame->end;
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec)) /
- 1000;
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
update_ios_latency_stats(&conf->cumulative, elapsed, op);
update_ios_latency_stats(&conf->incremental, elapsed, op);
@@ -1796,12 +1775,13 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
{
struct ios_conf *conf = NULL;
int cnt = 0;
- char key[256];
+ char key[32];
+ int keylen;
struct ios_stat_head *list_head = NULL;
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *dict_timestr = NULL;
@@ -1820,14 +1800,9 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
ret = dict_set_uint64(resp, "max-open",
conf->cumulative.max_nr_opens);
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec,
- gf_timefmt_FT);
- if (conf->cumulative.max_openfd_time.tv_sec)
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time,
+ gf_timefmt_FT);
dict_timestr = gf_strdup(timestr);
if (!dict_timestr)
@@ -1867,7 +1842,7 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
default:
goto out;
}
- ret = dict_set_int32(resp, "top-op", flags);
+ ret = dict_set_int32_sizen(resp, "top-op", flags);
if (!list_cnt)
goto out;
LOCK(&list_head->lock);
@@ -1875,24 +1850,24 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
list_for_each_entry(entry, &list_head->iosstats->list, list)
{
cnt++;
- snprintf(key, 256, "%s-%d", "filename", cnt);
- ret = dict_set_str(resp, key, entry->iosstat->filename);
+ keylen = snprintf(key, sizeof(key), "filename-%d", cnt);
+ ret = dict_set_strn(resp, key, keylen, entry->iosstat->filename);
if (ret)
goto unlock_list_head;
- snprintf(key, 256, "%s-%d", "value", cnt);
+ snprintf(key, sizeof(key), "value-%d", cnt);
ret = dict_set_uint64(resp, key, entry->value);
if (ret)
goto unlock_list_head;
if (index != IOS_STATS_THRU_MAX) {
- snprintf(key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32(
- resp, key,
+ keylen = snprintf(key, sizeof(key), "time-sec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
entry->iosstat->thru_counters[index].time.tv_sec);
if (ret)
goto unlock_list_head;
- snprintf(key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32(
- resp, key,
+ keylen = snprintf(key, sizeof(key), "time-usec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
entry->iosstat->thru_counters[index].time.tv_usec);
if (ret)
goto unlock_list_head;
@@ -1907,7 +1882,7 @@ unlock_list_head:
* failed. */
if (ret)
goto out;
- ret = dict_set_int32(resp, "members", cnt);
+ ret = dict_set_int32_sizen(resp, "members", cnt);
out:
return ret;
}
@@ -2123,6 +2098,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
+io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE);
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+ return 0;
+}
+
+int
io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
dict_t *xdata)
@@ -2877,6 +2865,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
}
int
+io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off_in, fd_t *fd_out, off_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags, xdata);
+ return 0;
+}
+
+int
io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY(frame);
@@ -3009,7 +3010,7 @@ conditional_dump(dict_t *dict, char *key, data_t *value, void *data)
} else {
(void)ios_dump_args_init(&args, IOS_DUMP_TYPE_FILE, logfp);
}
- io_stats_dump(this, &args, GF_CLI_INFO_ALL, _gf_false);
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
fclose(logfp);
return 0;
}
@@ -3112,7 +3113,7 @@ _ios_dump_thread(xlator_t *this)
stats_logfp = fopen(stats_filename, "w+");
if (stats_logfp) {
(void)ios_dump_args_init(&args, conf->dump_format, stats_logfp);
- io_stats_dump(this, &args, GF_CLI_INFO_ALL, _gf_false);
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
fclose(stats_logfp);
log_stats_fopen_failure = _gf_true;
} else if (log_stats_fopen_failure) {
@@ -3450,12 +3451,13 @@ io_stats_release(xlator_t *this, fd_t *fd)
BUMP_FOP(RELEASE);
conf = this->private;
-
- LOCK(&conf->lock);
- {
- conf->cumulative.nr_opens--;
+ if (conf) {
+ LOCK(&conf->lock);
+ {
+ conf->cumulative.nr_opens--;
+ }
+ UNLOCK(&conf->lock);
}
- UNLOCK(&conf->lock);
ios_fd_ctx_get(fd, this, &iosfd);
if (iosfd) {
@@ -3572,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf)
return;
}
-static int
+static void
io_stats_clear(struct ios_conf *conf)
{
- struct timeval now;
- int ret = -1;
+ time_t now = 0;
GF_ASSERT(conf);
+ now = gf_time();
- if (!gettimeofday(&now, NULL)) {
- LOCK(&conf->lock);
- {
- ios_global_stats_clear(&conf->cumulative, &now);
- ios_global_stats_clear(&conf->incremental, &now);
- conf->increment = 0;
- }
- UNLOCK(&conf->lock);
- ret = 0;
+ LOCK(&conf->lock);
+ {
+ ios_global_stats_clear(&conf->cumulative, now);
+ ios_global_stats_clear(&conf->incremental, now);
+ conf->increment = 0;
}
-
- return ret;
+ UNLOCK(&conf->lock);
}
int32_t
@@ -3657,15 +3654,15 @@ io_priv(xlator_t *this)
}
static void
-ios_set_log_format_code(struct ios_conf *conf)
+ios_set_log_format_code(struct ios_conf *conf, char *dump_format_str)
{
- if (strcmp(conf->dump_format_str, "json") == 0)
+ if (strcmp(dump_format_str, "json") == 0)
conf->dump_format = IOS_DUMP_TYPE_JSON_FILE;
- else if (strcmp(conf->dump_format_str, "text") == 0)
+ else if (strcmp(dump_format_str, "text") == 0)
conf->dump_format = IOS_DUMP_TYPE_FILE;
- else if (strcmp(conf->dump_format_str, "dict") == 0)
+ else if (strcmp(dump_format_str, "dict") == 0)
conf->dump_format = IOS_DUMP_TYPE_DICT;
- else if (strcmp(conf->dump_format_str, "samples") == 0)
+ else if (strcmp(dump_format_str, "samples") == 0)
conf->dump_format = IOS_DUMP_TYPE_SAMPLES;
}
@@ -3682,19 +3679,23 @@ xlator_set_loglevel(xlator_t *this, int log_level)
active = ctx->active;
top = active->first;
- if (strcmp(top->type, "protocol/server") || (log_level == -1))
+ if (log_level == -1)
return;
- /* Set log-level for server xlator */
- top->loglevel = log_level;
+ if (ctx->cmd_args.brick_mux) {
+ /* Set log-level for all brick xlators */
+ top->loglevel = log_level;
- /* Set log-level for parent xlator */
- if (this->parents)
- this->parents->xlator->loglevel = log_level;
+ /* Set log-level for parent xlator */
+ if (this->parents)
+ this->parents->xlator->loglevel = log_level;
- while (trav) {
- trav->loglevel = log_level;
- trav = trav->next;
+ while (trav) {
+ trav->loglevel = log_level;
+ trav = trav->next;
+ }
+ } else {
+ gf_log_set_loglevel(this->ctx, log_level);
}
}
@@ -3706,6 +3707,7 @@ reconfigure(xlator_t *this, dict_t *options)
char *sys_log_str = NULL;
char *log_format_str = NULL;
char *logger_str = NULL;
+ char *dump_format_str = NULL;
int sys_log_level = -1;
char *log_str = NULL;
int log_level = -1;
@@ -3714,6 +3716,7 @@ reconfigure(xlator_t *this, dict_t *options)
uint32_t log_buf_size = 0;
uint32_t log_flush_timeout = 0;
int32_t old_dump_interval;
+ int32_t threads;
if (!this || !this->private)
goto out;
@@ -3750,9 +3753,8 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("ios-sample-interval", conf->ios_sample_interval, options,
int32, out);
- GF_OPTION_RECONF("ios-dump-format", conf->dump_format_str, options, str,
- out);
- ios_set_log_format_code(conf);
+ GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options,
int32, out);
GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out);
@@ -3787,6 +3789,9 @@ reconfigure(xlator_t *this, dict_t *options)
out);
gf_log_set_log_flush_timeout(log_flush_timeout);
+ GF_OPTION_RECONF("threads", threads, options, int32, out);
+ gf_async_adjust_threads(threads);
+
ret = 0;
out:
gf_log(this ? this->name : "io-stats", GF_LOG_DEBUG,
@@ -3824,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf)
_ios_destroy_dump_thread(conf);
ios_destroy_sample_buf(conf->ios_sample_buf);
LOCK_DESTROY(&conf->lock);
- GF_FREE(conf->dnscache);
+ gf_dnscache_deinit(conf->dnscache);
GF_FREE(conf);
}
@@ -3847,16 +3852,18 @@ ios_init_stats(struct ios_global_stats *stats)
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++)
GF_ATOMIC_INIT(stats->upcall_hits[i], 0);
- gettimeofday(&stats->started_at, NULL);
+ stats->started_at = gf_time();
}
int
init(xlator_t *this)
{
struct ios_conf *conf = NULL;
+ char *volume_id = NULL;
char *sys_log_str = NULL;
char *logger_str = NULL;
char *log_format_str = NULL;
+ char *dump_format_str = NULL;
int logger = -1;
int log_format = -1;
int sys_log_level = -1;
@@ -3865,6 +3872,7 @@ init(xlator_t *this)
int ret = -1;
uint32_t log_buf_size = 0;
uint32_t log_flush_timeout = 0;
+ int32_t threads;
if (!this)
return -1;
@@ -3892,6 +3900,11 @@ init(xlator_t *this)
conf->unique_id = this->name;
}
+ ret = dict_get_strn(this->options, "volume-id", SLEN("volume-id"),
+ &volume_id);
+ if (!ret) {
+ strncpy(this->graph->volume_id, volume_id, GF_UUID_BUF_SIZE);
+ }
/*
* Init it just after calloc, so that we are sure the lock is inited
* in case of error paths.
@@ -3917,8 +3930,8 @@ init(xlator_t *this)
GF_OPTION_INIT("ios-sample-interval", conf->ios_sample_interval, int32,
out);
- GF_OPTION_INIT("ios-dump-format", conf->dump_format_str, str, out);
- ios_set_log_format_code(conf);
+ GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
out);
@@ -3932,6 +3945,10 @@ init(xlator_t *this)
GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
out);
conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec);
+ if (!conf->dnscache) {
+ ret = -1;
+ goto out;
+ }
GF_OPTION_INIT("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
@@ -3964,6 +3981,9 @@ init(xlator_t *this)
GF_OPTION_INIT("log-flush-timeout", log_flush_timeout, time, out);
gf_log_set_log_flush_timeout(log_flush_timeout);
+ GF_OPTION_INIT("threads", threads, int32, out);
+ gf_async_adjust_threads(threads);
+
this->private = conf;
if (conf->ios_dump_interval > 0) {
conf->dump_thread_running = _gf_true;
@@ -4068,8 +4088,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
}
} else {
ret = dict_get_int32(dict, "info-op", &op);
- if (ret || op < GF_CLI_INFO_ALL || GF_CLI_INFO_CLEAR < op)
- op = GF_CLI_INFO_ALL;
+ if (ret || op < GF_IOS_INFO_ALL || GF_IOS_INFO_CLEAR < op)
+ op = GF_IOS_INFO_ALL;
ret = dict_set_int32(output, "info-op", op);
if (ret) {
@@ -4078,13 +4098,10 @@ notify(xlator_t *this, int32_t event, void *data, ...)
goto out;
}
- if (GF_CLI_INFO_CLEAR == op) {
- ret = io_stats_clear(this->private);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to clear info stats");
+ if (GF_IOS_INFO_CLEAR == op) {
+ io_stats_clear(this->private);
- ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1);
+ ret = dict_set_int32(output, "stats-cleared", 1);
if (ret)
gf_log(this->name, GF_LOG_ERROR,
"Failed to set stats-cleared"
@@ -4192,6 +4209,7 @@ struct xlator_fops fops = {
.getactivelk = io_stats_getactivelk,
.setactivelk = io_stats_setactivelk,
.compound = io_stats_compound,
+ .copy_file_range = io_stats_copy_file_range,
};
struct xlator_cbks cbks = {
@@ -4406,6 +4424,57 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "/no/such/path",
.description = "Unique ID for our files."},
+ {.key = {"global-threading"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"io-stats", "threading"},
+ .description = "This option enables the global threading support for "
+ "bricks. If enabled, it's recommended to also enable "
+ "'performance.iot-pass-through'"},
+ {.key = {"threads"}, .type = GF_OPTION_TYPE_INT},
+ {.key = {"brick-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on bricks"},
+ {.key = {"client-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on clients"},
+ {.key = {"volume-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_7_1},
+ .tags = {"global", "volume-id"},
+ .description =
+ "This option points to the 'unique' UUID particular to this "
+ "volume, which would be set in 'graph->volume_id'"},
{.key = {NULL}},
+};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-stats",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/debug/sink/src/sink.c b/xlators/debug/sink/src/sink.c
index fbbdd3a4847..9822bbb732e 100644
--- a/xlators/debug/sink/src/sink.c
+++ b/xlators/debug/sink/src/sink.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
int32_t
init(xlator_t *this)
@@ -80,3 +80,15 @@ struct xlator_cbks cbks = {};
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sink",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
index cf05a77b9f1..18a7e0414a6 100644
--- a/xlators/debug/trace/src/trace-mem-types.h
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __TRACE_MEM_TYPES_H__
#define __TRACE_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_trace_mem_types_ {
gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 0aca3a9a5bb..6ed0ca00342 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -22,13 +22,13 @@
static void
trace_stat_to_str(struct iatt *buf, char *str, size_t len)
{
- char atime_buf[200] = {
+ char atime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char mtime_buf[200] = {
+ char mtime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char ctime_buf[200] = {
+ char ctime_buf[GF_TIMESTR_SIZE] = {
0,
};
@@ -64,7 +64,7 @@ trace_stat_to_str(struct iatt *buf, char *str, size_t len)
int
dump_history_trace(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -72,9 +72,7 @@ dump_history_trace(circular_buffer_t *cb, void *data)
gettimeofday () fails, it's safe to check tm and then dump the time
at which the entry was added to the buffer */
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_Ymd_T);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("FOP", "%s\n", (char *)cb->data);
@@ -2209,10 +2207,10 @@ int
trace_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
@@ -2278,10 +2276,10 @@ int
trace_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
@@ -3520,3 +3518,17 @@ struct volume_options options[] = {
};
struct xlator_dumpops dumpops = {.history = trace_dump_history};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trace",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
index cd73e0f34ed..b16304799da 100644
--- a/xlators/debug/trace/src/trace.h
+++ b/xlators/debug/trace/src/trace.h
@@ -10,14 +10,14 @@
#include <time.h>
#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "event-history.h"
-#include "logging.h"
-#include "circ-buff.h"
-#include "statedump.h"
-#include "options.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/event-history.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/circ-buff.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/options.h>
#define TRACE_DEFAULT_HISTORY_SIZE 1024
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
deleted file mode 100644
index 36efc6698bd..00000000000
--- a/xlators/encryption/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = rot-13 crypt
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/Makefile.am b/xlators/encryption/crypt/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/encryption/crypt/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
deleted file mode 100644
index 05fd3d5096b..00000000000
--- a/xlators/encryption/crypt/src/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-if ENABLE_CRYPT_XLATOR
-
-xlator_LTLIBRARIES = crypt.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-
-crypt_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
-crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -lssl -lcrypto
-
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
-else
-
-noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-endif
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
deleted file mode 100644
index 8e9c4940abd..00000000000
--- a/xlators/encryption/crypt/src/atom.c
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-/*
- * Glossary
- *
- *
- * cblock (or cipher block). A logical unit in a file.
- * cblock size is defined as the number of bits
- * in an input (or output) block of the block
- * cipher (*). Cipher block size is a property of
- * cipher algorithm. E.g. cblock size is 64 bits
- * for DES, 128 bits for AES, etc.
- *
- * atomic cipher A cipher algorithm, which requires some chunks of
- * algorithm text to be padded at left and(or) right sides before
- * cipher transaform.
- *
- *
- * block (atom) Minimal chunk of file's data, which doesn't require
- * padding. We'll consider logical units in a file of
- * block size (atom size).
- *
- * cipher algorithm Atomic cipher algorithm, which requires the last
- * with EOF issue incomplete cblock in a file to be padded with some
- * data (usually zeros).
- *
- *
- * operation, which reading/writing from offset, which is not aligned to
- * forms a gap at to atom size
- * the beginning
- *
- *
- * operation, which reading/writing count bytes starting from offset off,
- * forms a gap at so that off+count is not aligned to atom_size
- * the end
- *
- * head block the first atom affected by an operation, which forms
- * a gap at the beginning, or(and) at the end.
- * Сomment. Head block has at least one gap (either at
- * the beginning, or at the end)
- *
- *
- * tail block the last atom different from head, affected by an
- * operation, which forms a gap at the end.
- * Сomment: Tail block has exactly one gap (at the end).
- *
- *
- * partial block head or tail block
- *
- *
- * full block block without gaps.
- *
- *
- * (*) Recommendation for Block Cipher Modes of Operation
- * Methods and Techniques
- * NIST Special Publication 800-38A Edition 2001
- */
-
-/*
- * atom->offset_at()
- */
-static off_t
-offset_at_head(struct avec_config *conf)
-{
- return conf->aligned_offset;
-}
-
-static off_t
-offset_at_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_hole_conf(frame));
-}
-
-static off_t
-offset_at_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_data_conf(frame));
-}
-
-static off_t
-offset_at_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0) +
- (conf->nr_full_blocks << get_atom_bits(object));
-}
-
-static off_t
-offset_at_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_hole_conf(frame), object);
-}
-
-static off_t
-offset_at_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0);
-}
-
-static off_t
-offset_at_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->io_size_nopad()
- */
-
-static uint32_t
-io_size_nopad_head(struct avec_config *conf, struct object_cipher_info *object)
-{
- uint32_t gap_at_beg;
- uint32_t gap_at_end;
-
- check_head_block(conf);
-
- gap_at_beg = conf->off_in_head;
-
- if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0)
- gap_at_end = 0;
- else
- gap_at_end = get_atom_size(object) - conf->off_in_tail;
-
- return get_atom_size(object) - (gap_at_beg + gap_at_end);
-}
-
-static uint32_t
-io_size_nopad_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_tail_block(conf);
- return conf->off_in_tail;
-}
-
-static uint32_t
-io_size_nopad_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_full_block(conf);
- return get_atom_size(object);
-}
-
-static uint32_t
-io_size_nopad_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_hole_conf(frame), object);
-}
-
-static uint32_t
-offset_in_head(struct avec_config *conf)
-{
- check_cursor_head(conf);
-
- return conf->off_in_head;
-}
-
-static uint32_t
-offset_in_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return 0;
-}
-
-static uint32_t
-offset_in_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_cursor_full(conf);
-
- if (has_head_block(conf))
- return (conf->cursor - 1) << get_atom_bits(object);
- else
- return conf->cursor << get_atom_bits(object);
-}
-
-static uint32_t
-offset_in_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_data_conf(frame));
-}
-
-static uint32_t
-offset_in_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_hole_conf(frame));
-}
-
-static uint32_t
-offset_in_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-offset_in_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->rmw()
- */
-/*
- * Pre-conditions:
- * @vec contains plain text of the latest
- * version.
- *
- * Uptodate gaps of the @partial block with
- * this plain text, encrypt the whole block
- * and write the result to disk.
- */
-static int32_t
-rmw_partial_block(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- struct rmw_atom *atom)
-{
- size_t was_read = 0;
- uint64_t file_size;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *partial = atom->get_iovec(frame, 0);
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_partial_block;
-#if DEBUG_CRYPT
- gf_boolean_t check_last_cblock = _gf_false;
-#endif
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto exit;
-
- file_size = local->cur_file_size;
- was_read = op_ret;
-
- if (atom->locality == HEAD_ATOM && conf->off_in_head) {
- /*
- * head atom with a non-uptodate gap
- * at the beginning
- *
- * fill the gap with plain text of the
- * latest version. Convert a part of hole
- * (if any) to zeros.
- */
- int32_t i;
- int32_t copied = 0;
- int32_t to_gap; /* amount of data needed to uptodate
- the gap at the beginning */
-#if 0
- int32_t hole = 0; /* The part of the hole which
- * got in the head block */
-#endif /* 0 */
- to_gap = conf->off_in_head;
-
- if (was_read < to_gap) {
- if (file_size > offset_at_head(conf) + was_read) {
- /*
- * It is impossible to uptodate
- * head block: too few bytes have
- * been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the beginning");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
-#if 0
- hole = to_gap - was_read;
-#endif /* 0 */
- to_gap = was_read;
- }
- /*
- * uptodate the gap at the beginning
- */
- for (i = 0; i < count && copied < to_gap; i++) {
- int32_t to_copy;
-
- to_copy = vec[i].iov_len;
- if (to_copy > to_gap - copied)
- to_copy = to_gap - copied;
-
- memcpy(partial->iov_base, vec[i].iov_base, to_copy);
- copied += to_copy;
- }
-#if 0
- /*
- * If possible, convert part of the
- * hole, which got in the head block
- */
- ret = TRY_LOCK(&local->hole_lock);
- if (!ret) {
- if (local->hole_handled)
- /*
- * already converted by
- * crypt_writev_cbk()
- */
- UNLOCK(&local->hole_lock);
- else {
- /*
- * convert the part of the hole
- * which got in the head block
- * to zeros.
- *
- * Update the orig_offset to make
- * sure writev_cbk() won't care
- * about this part of the hole.
- *
- */
- memset(partial->iov_base + to_gap, 0, hole);
-
- conf->orig_offset -= hole;
- conf->orig_size += hole;
- UNLOCK(&local->hole_lock);
- }
- }
- else /*
- * conversion is being performed
- * by crypt_writev_cbk()
- */
- ;
-#endif /* 0 */
- }
- if (atom->locality == TAIL_ATOM ||
- (!has_tail_block(conf) && conf->off_in_tail)) {
- /*
- * tail atom, or head atom with a non-uptodate
- * gap at the end.
- *
- * fill the gap at the end of the block
- * with plain text of the latest version.
- * Pad the result, (if needed)
- */
- int32_t i;
- int32_t to_gap;
- int copied;
- off_t off_in_tail;
- int32_t to_copy;
-
- off_in_tail = conf->off_in_tail;
- to_gap = conf->gap_in_tail;
-
- if (to_gap && was_read < off_in_tail + to_gap) {
- /*
- * It is impossible to uptodate
- * the gap at the end: too few bytes
- * have been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the end");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * uptodate the gap at the end
- */
- copied = 0;
- to_copy = to_gap;
- for (i = count - 1; i >= 0 && to_copy > 0; i--) {
- uint32_t from_vec, off_in_vec;
-
- off_in_vec = 0;
- from_vec = vec[i].iov_len;
- if (from_vec > to_copy) {
- off_in_vec = from_vec - to_copy;
- from_vec = to_copy;
- }
- memcpy(partial->iov_base + off_in_tail + to_gap - copied - from_vec,
- vec[i].iov_base + off_in_vec, from_vec);
-
- gf_log(
- this->name, GF_LOG_DEBUG,
- "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
- (int)from_vec, (int)off_in_tail + to_gap - copied - from_vec,
- (int)off_in_vec);
-
- copied += from_vec;
- to_copy -= from_vec;
- }
- partial->iov_len = off_in_tail + to_gap;
-
- if (object_alg_should_pad(object)) {
- int32_t resid = 0;
- resid = partial->iov_len & (object_alg_blksize(object) - 1);
- if (resid) {
- /*
- * append a new EOF padding
- */
- local->eof_padding_size = object_alg_blksize(object) - resid;
-
- gf_log(this->name, GF_LOG_DEBUG, "set padding size %d",
- local->eof_padding_size);
-
- memset(partial->iov_base + partial->iov_len, 1,
- local->eof_padding_size);
- partial->iov_len += local->eof_padding_size;
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG,
- "pad cblock with %d zeros:", local->eof_padding_size);
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len -
- object_alg_blksize(object));
- check_last_cblock = _gf_true;
-#endif
- }
- }
- }
- /*
- * encrypt the whole block
- */
- encrypt_aligned_iov(object, partial, 1, atom->offset_at(frame, object));
-#if DEBUG_CRYPT
- if (check_last_cblock == _gf_true) {
- gf_log(this->name, GF_LOG_DEBUG, "encrypt last cblock with offset %llu",
- (unsigned long long)atom->offset_at(frame, object));
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len - object_alg_blksize(object));
- }
-#endif
- set_local_io_params_writev(frame, object, atom,
- atom->offset_at(frame, object),
- iov_length(partial, 1));
- /*
- * write the whole block to disk
- */
- end_writeback_partial_block = dispatch_end_writeback(local->fop);
- conf->cursor++;
- STACK_WIND(frame, end_writeback_partial_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd, partial, 1,
- atom->offset_at(frame, object), local->flags, local->iobref_data,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "submit partial block: %d bytes from %d offset",
- (int)iov_length(partial, 1), (int)atom->offset_at(frame, object));
-exit:
- return 0;
-}
-
-/*
- * Perform a (read-)modify-write sequence.
- * This should be performed only after approval
- * of upper server-side manager, i.e. the caller
- * needs to make sure this is his turn to rmw.
- */
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype)
-{
- int32_t ret;
- dict_t *dict;
- struct rmw_atom *atom;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- atom = atom_by_types(local->active_setup, ltype);
- /*
- * To perform the "read" component of the read-modify-write
- * sequence the crypt translator does stack_wind to itself.
- *
- * Pass current file size to crypt_readv()
- */
- dict = dict_new();
- if (!dict) {
- /*
- * FIXME: Handle the error
- */
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- return;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- /*
- * FIXME: Handle the error
- */
- dict_unref(dict);
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, atom->rmw, this, this->fops->readv, /* crypt_readv */
- fd, atom->count_to_uptodate(frame, object), /* count */
- atom->offset_at(frame, object), /* offset to read from */
- 0, dict);
-exit:
- dict_unref(dict);
-}
-
-/*
- * submit blocks of FULL_ATOM type
- */
-void
-submit_full(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
- uint32_t count; /* total number of full blocks to submit */
- uint32_t granularity; /* number of blocks to submit in one iteration */
-
- uint64_t off_in_file; /* start offset in the file, bytes */
- uint32_t off_in_atom; /* start offset in the atom, blocks */
- uint32_t blocks_written = 0; /* blocks written for this submit */
-
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_full_block;
- /*
- * Write full blocks by groups of granularity size.
- */
- end_writeback_full_block = dispatch_end_writeback(local->fop);
-
- if (is_ordered_mode(frame)) {
- uint32_t skip = has_head_block(conf) ? 1 : 0;
- count = 1;
- granularity = 1;
- /*
- * calculate start offset using cursor value;
- * here we should take into account head block,
- * which corresponds to cursor value 0.
- */
- off_in_file = atom->offset_at(frame, object) +
- ((conf->cursor - skip) << get_atom_bits(object));
- off_in_atom = conf->cursor - skip;
- } else {
- /*
- * in parallel mode
- */
- count = conf->nr_full_blocks;
- granularity = MAX_IOVEC;
- off_in_file = atom->offset_at(frame, object);
- off_in_atom = 0;
- }
- while (count) {
- uint32_t blocks_to_write = count;
-
- if (blocks_to_write > granularity)
- blocks_to_write = granularity;
- if (conf->type == HOLE_ATOM)
- /*
- * reset iovec before encryption
- */
- memset(atom->get_iovec(frame, 0)->iov_base, 0,
- get_atom_size(object));
- /*
- * encrypt the group
- */
- encrypt_aligned_iov(
- object, atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)));
-
- set_local_io_params_writev(
- frame, object, atom,
- off_in_file + (blocks_written << get_atom_bits(object)),
- blocks_to_write << get_atom_bits(object));
-
- conf->cursor += blocks_to_write;
-
- STACK_WIND(frame, end_writeback_full_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd,
- atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)),
- local->flags,
- local->iobref_data ? local->iobref_data : local->iobref,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
- blocks_to_write,
- (int)(off_in_file + (blocks_written << get_atom_bits(object))));
-
- count -= blocks_to_write;
- blocks_written += blocks_to_write;
- }
- return;
-}
-
-static int32_t
-rmw_data_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_data_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, TAIL_ATOM));
-}
-
-static int32_t
-rmw_hole_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_hole_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, TAIL_ATOM));
-}
-
-/*
- * atom->count_to_uptodate()
- */
-static uint32_t
-count_to_uptodate_head(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- if (conf->acount == 1 && conf->off_in_tail)
- return get_atom_size(object);
- else
- /* there is no need to read the whole head block */
- return conf->off_in_head;
-}
-
-static uint32_t
-count_to_uptodate_tail(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- /* we need to read the whole tail block */
- return get_atom_size(object);
-}
-
-static uint32_t
-count_to_uptodate_data_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_data_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_hole_conf(frame), object);
-}
-
-/* atom->get_config() */
-
-static struct avec_config *
-get_config_data(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->data_conf;
-}
-
-static struct avec_config *
-get_config_hole(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->hole_conf;
-}
-
-/*
- * atom->get_iovec()
- */
-static struct iovec *
-get_iovec_hole_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_hole_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0);
-}
-
-static struct iovec *
-get_iovec_hole_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->blocks_in_pool - 1);
-}
-
-static struct iovec *
-get_iovec_data_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_data_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + count;
-}
-
-static struct iovec *
-get_iovec_data_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + conf->nr_full_blocks;
-}
-
-static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
- [DATA_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_data_head,
- .offset_at = offset_at_data_head,
- .offset_in = offset_in_data_head,
- .get_iovec = get_iovec_data_head,
- .io_size_nopad = io_size_nopad_data_head,
- .count_to_uptodate = count_to_uptodate_data_head,
- .get_config = get_config_data},
- [DATA_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_data_tail,
- .offset_at = offset_at_data_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_data_tail,
- .io_size_nopad = io_size_nopad_data_tail,
- .count_to_uptodate = count_to_uptodate_data_tail,
- .get_config = get_config_data},
- [DATA_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_data_full,
- .offset_in = offset_in_data_full,
- .get_iovec = get_iovec_data_full,
- .io_size_nopad = io_size_nopad_data_full,
- .get_config = get_config_data},
- [HOLE_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_hole_head,
- .offset_at = offset_at_hole_head,
- .offset_in = offset_in_hole_head,
- .get_iovec = get_iovec_hole_head,
- .io_size_nopad = io_size_nopad_hole_head,
- .count_to_uptodate = count_to_uptodate_hole_head,
- .get_config = get_config_hole},
- [HOLE_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_hole_tail,
- .offset_at = offset_at_hole_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_hole_tail,
- .io_size_nopad = io_size_nopad_hole_tail,
- .count_to_uptodate = count_to_uptodate_hole_tail,
- .get_config = get_config_hole},
- [HOLE_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_hole_full,
- .offset_in = offset_in_hole_full,
- .get_iovec = get_iovec_hole_full,
- .io_size_nopad = io_size_nopad_hole_full,
- .get_config = get_config_hole}};
-
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality)
-{
- return &atoms[data][locality];
-}
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
deleted file mode 100644
index 123d5c2a631..00000000000
--- a/xlators/encryption/crypt/src/crypt-common.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_COMMON_H__
-#define __CRYPT_COMMON_H__
-
-#define INVAL_SUBVERSION_NUMBER (0xff)
-#define CRYPT_INVAL_OP (GF_FOP_NULL)
-
-#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
-#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
-#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
-#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
-#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
-#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
-#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
-
-/* messages for crypt_open() */
-#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
-#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
-
-#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
-
-#define noop \
- do { \
- ; \
- } while (0)
-#define cassert(cond) \
- ({ \
- switch (-1) { \
- case (cond): \
- case 0: \
- break; \
- } \
- })
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1)
-
-/*
- * Format of file's metadata
- */
-struct crypt_format {
- uint8_t loader_id; /* version of metadata loader */
- uint8_t versioned[0]; /* file's metadata of specific version */
-} __attribute__((packed));
-
-typedef enum { AES_CIPHER_ALG, LAST_CIPHER_ALG } cipher_alg_t;
-
-typedef enum { XTS_CIPHER_MODE, LAST_CIPHER_MODE } cipher_mode_t;
-
-typedef enum { MTD_LOADER_V1, LAST_MTD_LOADER } mtd_loader_id;
-
-static inline void
-msgflags_set_mtd_rlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline void
-msgflags_set_mtd_wlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_rlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_wlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_lock(uint32_t *flags)
-{
- return msgflags_check_mtd_rlock(flags) || msgflags_check_mtd_wlock(flags);
-}
-
-/*
- * returns number of logical blocks occupied
- * (maybe partially) by @count bytes
- * at offset @start.
- */
-static inline off_t
-logical_blocks_occupied(uint64_t start, off_t count, int blkbits)
-{
- return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
-}
-
-/*
- * are two bytes (represented by offsets @off1
- * and @off2 respectively) in the same logical
- * block.
- */
-static inline int
-in_same_lblock(uint64_t off1, uint64_t off2, int blkbits)
-{
- return off1 >> blkbits == off2 >> blkbits;
-}
-
-static inline void
-dump_cblock(xlator_t *this, unsigned char *buf)
-{
- gf_log(this->name, GF_LOG_DEBUG,
- "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
- (buf)[0], (buf)[1], (buf)[2], (buf)[3], (buf)[4], (buf)[5], (buf)[6],
- (buf)[7], (buf)[8], (buf)[9], (buf)[10], (buf)[11], (buf)[12],
- (buf)[13], (buf)[14], (buf)[15]);
-}
-
-#endif /* __CRYPT_COMMON_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
deleted file mode 100644
index 7e9fb90ed43..00000000000
--- a/xlators/encryption/crypt/src/crypt-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_MEM_TYPES_H__
-#define __CRYPT_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_crypt_mem_types_ {
- gf_crypt_mt_priv = gf_common_mt_end + 1,
- gf_crypt_mt_inode,
- gf_crypt_mt_data,
- gf_crypt_mt_mtd,
- gf_crypt_mt_loc,
- gf_crypt_mt_iatt,
- gf_crypt_mt_key,
- gf_crypt_mt_iovec,
- gf_crypt_mt_char,
- gf_crypt_mt_local,
- gf_crypt_mt_end,
-};
-
-#endif /* __CRYPT_MEM_TYPES_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
deleted file mode 100644
index 02c4028c087..00000000000
--- a/xlators/encryption/crypt/src/crypt.c
+++ /dev/null
@@ -1,3906 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master);
-static int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t from,
- off_t size);
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-put_one_call_open(call_frame_t *frame);
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
-static void
-free_avec_data(crypt_local_t *local);
-static void
-free_avec_hole(crypt_local_t *local);
-
-static crypt_local_t *
-crypt_alloc_local(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop)
-{
- crypt_local_t *local = NULL;
-
- local = GF_CALLOC(1, sizeof(crypt_local_t), gf_crypt_mt_local);
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "out of memory");
- return NULL;
- }
- local->fop = fop;
- LOCK_INIT(&local->hole_lock);
- LOCK_INIT(&local->call_lock);
- LOCK_INIT(&local->rw_count_lock);
-
- frame->local = local;
- return local;
-}
-
-struct crypt_inode_info *
-get_crypt_inode_info(inode_t *inode, xlator_t *this)
-{
- int ret;
- uint64_t value = 0;
- struct crypt_inode_info *info;
-
- ret = inode_ctx_get(inode, this, &value);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "Can not get inode info");
- return NULL;
- }
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Can not obtain inode info");
- return NULL;
- }
- return info;
-}
-
-static struct crypt_inode_info *
-local_get_inode_info(crypt_local_t *local, xlator_t *this)
-{
- if (local->info)
- return local->info;
- local->info = get_crypt_inode_info(local->fd->inode, this);
- return local->info;
-}
-
-static struct crypt_inode_info *
-alloc_inode_info(crypt_local_t *local, loc_t *loc)
-{
- struct crypt_inode_info *info;
-
- info = GF_CALLOC(1, sizeof(struct crypt_inode_info), gf_crypt_mt_inode);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate inode info");
- return NULL;
- }
-#if DEBUG_CRYPT
- info->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!info->loc) {
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
- GF_FREE(info);
- return NULL;
- }
- if (loc_copy(info->loc, loc)) {
- GF_FREE(info->loc);
- GF_FREE(info);
- return NULL;
- }
-#endif /* DEBUG_CRYPT */
-
- local->info = info;
- return info;
-}
-
-static void
-free_inode_info(struct crypt_inode_info *info)
-{
-#if DEBUG_CRYPT
- loc_wipe(info->loc);
- GF_FREE(info->loc);
-#endif
- memset(info, 0, sizeof(*info));
- GF_FREE(info);
-}
-
-int
-crypt_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx_addr = 0;
- if (!inode_ctx_del(inode, this, &ctx_addr))
- free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
- return 0;
-}
-
-#if DEBUG_CRYPT
-static void
-check_read(call_frame_t *frame, xlator_t *this, int32_t read, struct iovec *vec,
- int32_t count, struct iatt *stbuf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = get_object_cinfo(local->info);
- struct avec_config *conf = &local->data_conf;
- uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
-
- if (read <= 0)
- return;
- if (read != iov_length(vec, count))
- gf_log("crypt", GF_LOG_DEBUG,
- "op_ret differs from amount of read bytes");
-
- if (object_alg_should_pad(object) &&
- (read & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad amount of read bytes (!= 0 mod(cblock size))");
-
- if (conf->aligned_offset + read >
- stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
- gf_log("crypt", GF_LOG_DEBUG, "bad amount of read bytes (too large))");
-}
-
-#define PT_BYTES_TO_DUMP (32)
-static void
-dump_plain_text(crypt_local_t *local, struct iovec *avec)
-{
- int32_t to_dump;
- char str[PT_BYTES_TO_DUMP + 1];
-
- if (!avec)
- return;
- to_dump = avec->iov_len;
- if (to_dump > PT_BYTES_TO_DUMP)
- to_dump = PT_BYTES_TO_DUMP;
- memcpy(str, avec->iov_base, to_dump);
- memset(str + to_dump, '0', 1);
- gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
-}
-
-static int32_t
-data_conf_invariant(struct avec_config *conf)
-{
- return conf->acount == !!has_head_block(conf) + !!has_tail_block(conf) +
- conf->nr_full_blocks;
-}
-
-static int32_t
-hole_conf_invariant(struct avec_config *conf)
-{
- return conf->blocks_in_pool == !!has_head_block(conf) +
- !!has_tail_block(conf) +
- !!has_full_blocks(conf);
-}
-
-static void
-crypt_check_conf(struct avec_config *conf)
-{
- int32_t ret = 0;
- const char *msg;
-
- switch (conf->type) {
- case DATA_ATOM:
- msg = "data";
- ret = data_conf_invariant(conf);
- break;
- case HOLE_ATOM:
- msg = "hole";
- ret = hole_conf_invariant(conf);
- break;
- default:
- msg = "unknown";
- }
- if (!ret)
- gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
-}
-
-static void
-check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- uint64_t local_file_size;
-
- switch (local->fop) {
- case GF_FOP_FTRUNCATE:
- return;
- case GF_FOP_WRITE:
- local_file_size = local->new_file_size;
- break;
- case GF_FOP_READ:
- if (parent_is_crypt_xlator(frame, this))
- return;
- local_file_size = local->cur_file_size;
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
- return;
- }
- if (buf->ia_size != round_up(local_file_size, object_alg_blksize(object)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad ia_size in buf (%llu), should be %llu",
- (unsigned long long)buf->ia_size,
- (unsigned long long)round_up(local_file_size,
- object_alg_blksize(object)));
-}
-
-#else
-#define check_read(frame, this, op_ret, vec, count, stbuf) noop
-#define dump_plain_text(local, avec) noop
-#define crypt_check_conf(conf) noop
-#define check_buf(frame, this, buf) noop
-#endif /* DEBUG_CRYPT */
-
-/*
- * Pre-conditions:
- * @vec represents a ciphertext of expanded size and
- * aligned offset.
- *
- * Compound a temporal vector @avec with block-aligned
- * components, decrypt and fix it up to represent a chunk
- * of data corresponding to the original size and offset.
- * Pass the result to the next translator.
- */
-int32_t
-crypt_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *avec;
- uint32_t i;
- uint32_t to_vec;
- uint32_t to_user;
-
- check_buf(frame, this, stbuf);
- check_read(frame, this, op_ret, vec, count, stbuf);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->iobref = iobref_ref(iobref);
-
- local->buf = *stbuf;
- local->buf.ia_size = local->cur_file_size;
-
- if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
- goto put_one_call;
-
- if (conf->orig_offset >= local->cur_file_size) {
- local->op_ret = 0;
- goto put_one_call;
- }
- /*
- * correct config params with real file size
- * and actual amount of bytes read
- */
- set_config_offsets(frame, this, conf->orig_offset, op_ret, DATA_ATOM, 0);
-
- if (conf->orig_offset + conf->orig_size > local->cur_file_size)
- conf->orig_size = local->cur_file_size - conf->orig_offset;
- /*
- * calculate amount of data to be returned
- * to user.
- */
- to_user = op_ret;
- if (conf->aligned_offset + to_user <= conf->orig_offset) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- to_user -= (conf->aligned_offset - conf->orig_offset);
-
- if (to_user > conf->orig_size)
- to_user = conf->orig_size;
- local->rw_count = to_user;
-
- op_errno = set_config_avec_data(this, local, conf, object, vec, count);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto put_one_call;
- }
- avec = conf->avec;
-#if DEBUG_CRYPT
- if (conf->off_in_tail != 0 &&
- conf->off_in_tail < object_alg_blksize(object) &&
- object_alg_should_pad(object))
- gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
- conf->off_in_tail);
- if (iov_length(vec, count) != 0 &&
- in_same_lblock(conf->orig_offset + iov_length(vec, count) - 1,
- local->cur_file_size - 1, object_alg_blkbits(object))) {
- gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
- dump_cblock(this, (unsigned char *)(avec[conf->acount - 1].iov_base) +
- avec[conf->acount - 1].iov_len -
- object_alg_blksize(object));
- dump_cblock(this, (unsigned char *)(vec[count - 1].iov_base) +
- vec[count - 1].iov_len -
- object_alg_blksize(object));
- }
-#endif
- decrypt_aligned_iov(object, avec, conf->acount, conf->aligned_offset);
- /*
- * pass proper plain data to user
- */
- avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
- avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
-
- to_vec = to_user;
- for (i = 0; i < conf->acount; i++) {
- if (avec[i].iov_len > to_vec)
- avec[i].iov_len = to_vec;
- to_vec -= avec[i].iov_len;
- }
-put_one_call:
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-do_readv(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size, local->data_conf.aligned_offset,
- local->flags, local->xdata);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call(frame);
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_readv_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size
- */
- STACK_WIND(frame, do_readv, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- fd_unref(local->fd);
- if (local->xdata)
- dict_unref(local->xdata);
- CRYPT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-static int32_t
-readv_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "stat failed (%d)", op_errno);
- goto error;
- }
- local->buf = *buf;
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-crypt_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
- (int)size, (long long)offset);
- if (parent_is_crypt_xlator(frame, this))
- gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_READ);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- if (size == 0)
- goto trivial;
-
- local->fd = fd_ref(fd);
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- set_config_offsets(frame, this, offset, size, DATA_ATOM, 0);
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * We are called by crypt_writev (or cypt_ftruncate)
- * to perform the "read" component of the read-modify-write
- * (or read-prune-write) sequence for some atom;
- *
- * don't ask for access:
- * it has already been acquired
- *
- * Retrieve current file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size,
- local->data_conf.aligned_offset, flags, NULL);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_readv_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, readv_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, -1, ret, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size)
-{
- crypt_local_t *local = frame->local;
-
- local->io_offset = io_offset;
- local->io_size = io_size;
-
- local->io_offset_nopad = atom->offset_at(frame, object) +
- atom->offset_in(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad offset to %llu",
- (unsigned long long)local->io_offset_nopad);
-
- local->io_size_nopad = atom->io_size_nopad(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad size to %llu",
- (unsigned long long)local->io_size_nopad);
-
- local->update_disk_file_size = 0;
- /*
- * NOTE: eof_padding_size is 0 for all full atoms;
- * For head and tail atoms it will be set up at rmw_partial block()
- */
- local->new_file_size = local->cur_file_size;
-
- if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
- local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
-
- gf_log("crypt", GF_LOG_DEBUG, "set new file size to %llu",
- (unsigned long long)local->new_file_size);
-
- local->update_disk_file_size = 1;
- }
-}
-
-void
-set_local_io_params_ftruncate(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- uint32_t resid;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- resid = conf->orig_offset & (object_alg_blksize(object) - 1);
- if (resid) {
- local->eof_padding_size = object_alg_blksize(object) - resid;
- local->new_file_size = conf->aligned_offset;
- local->update_disk_file_size = 0;
- /*
- * file size will be updated
- * in the ->writev() stack,
- * when submitting file tail
- */
- } else {
- local->eof_padding_size = 0;
- local->new_file_size = conf->orig_offset;
- local->update_disk_file_size = 1;
- /*
- * file size will be updated
- * in this ->ftruncate stack
- */
- }
-}
-
-static void
-submit_head(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, HEAD_ATOM);
-}
-
-static void
-submit_tail(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, TAIL_ATOM);
-}
-
-static void
-submit_hole(call_frame_t *frame, xlator_t *this)
-{
- /*
- * hole conversion always means
- * appended write and goes in ordered fashion
- */
- do_ordered_submit(frame, this, HOLE_ATOM);
-}
-
-static void
-submit_data(call_frame_t *frame, xlator_t *this)
-{
- if (is_ordered_mode(frame)) {
- do_ordered_submit(frame, this, DATA_ATOM);
- return;
- }
- gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
- get_nr_calls(frame, nr_calls_data(frame));
- do_parallel_submit(frame, this, DATA_ATOM);
- return;
-}
-
-/*
- * heplers called by writev_cbk, fruncate_cbk in ordered mode
- */
-
-static int32_t
-should_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- return conf->avec != NULL;
-}
-
-static int32_t
-should_resume_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
- /*
- * Don't submit a part of hole, which
- * fits into a data block:
- * this part of hole will be converted
- * as a gap filled by zeros in data head
- * block.
- */
- return conf->cursor < conf->acount - 1;
- else
- return conf->cursor < conf->acount;
-}
-
-static int32_t
-should_resume_submit_data(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- if (is_ordered_mode(frame))
- return conf->cursor < conf->acount;
- /*
- * parallel writes
- */
- return 0;
-}
-
-static int32_t
-should_submit_data_after_hole(crypt_local_t *local)
-{
- return local->data_conf.avec != NULL;
-}
-
-static void
-update_local_file_params(call_frame_t *frame, xlator_t *this,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- crypt_local_t *local = frame->local;
-
- check_buf(frame, this, postbuf);
-
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->new_file_size;
-
- local->cur_file_size = local->new_file_size;
-}
-
-static int32_t
-end_writeback_writev(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret <= 0) {
- gf_log(this->name, GF_LOG_WARNING, "writev iteration failed");
- goto put_one_call;
- }
- /*
- * op_ret includes paddings (atom's head, atom's tail and EOF)
- */
- if (op_ret < local->io_size) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete writev iteration");
- goto put_one_call;
- }
- op_ret -= local->eof_padding_size;
- local->op_ret = op_ret;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local)) {
- LOCK(&local->rw_count_lock);
- local->rw_count += op_ret;
- UNLOCK(&local->rw_count_lock);
-
- if (should_resume_submit_data(frame))
- submit_data(frame, this);
- } else {
- /*
- * hole conversion is going on;
- * don't take into account written zeros
- */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
-
- else if (should_submit_data_after_hole(local))
- submit_data(frame, this);
- }
-put_one_call:
- put_one_call_writev(frame, this);
- return 0;
-}
-
-#define crypt_writev_cbk end_writeback_writev
-
-#define HOLE_WRITE_CHUNK_BITS 12
-#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
-
-/*
- * Convert hole of size @size at offset @off to
- * zeros and prepare respective iovecs for submit.
- * The hole lock should be held.
- *
- * Pre-conditions:
- * @local->file_size is set and valid.
- */
-int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t off,
- off_t size)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
-
- ret = set_config_avec_hole(this, local, &local->hole_conf, object,
- local->fop);
- crypt_check_conf(&local->hole_conf);
-
- return ret;
-}
-
-/*
- * prepare for submit @count bytes at offset @from
- */
-int32_t
-prepare_for_submit_data(call_frame_t *frame, xlator_t *this, off_t from,
- int32_t size, struct iovec *vec, int32_t vec_count,
- int32_t setup_gap)
-{
- uint32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, from, size, DATA_ATOM, setup_gap);
-
- ret = set_config_avec_data(this, local, &local->data_conf, object, vec,
- vec_count);
- crypt_check_conf(&local->data_conf);
-
- return ret;
-}
-
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool)
-{
- if (!avec)
- return;
- GF_FREE(pool);
- GF_FREE(avec);
-}
-
-static void
-free_avec_data(crypt_local_t *local)
-{
- return free_avec(local->data_conf.avec, local->data_conf.pool,
- local->data_conf.blocks_in_pool);
-}
-
-static void
-free_avec_hole(crypt_local_t *local)
-{
- return free_avec(local->hole_conf.avec, local->hole_conf.pool,
- local->hole_conf.blocks_in_pool);
-}
-
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (has_head_block(conf))
- submit_head(frame, this);
-
- if (has_full_blocks(conf))
- submit_full(frame, this);
-
- if (has_tail_block(conf))
- submit_tail(frame, this);
- return;
-}
-
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (should_submit_head_block(conf)) {
- get_one_call_nolock(frame);
- submit_head(frame, this);
- } else if (should_submit_full_block(conf)) {
- get_one_call_nolock(frame);
- submit_full(frame, this);
- } else if (should_submit_tail_block(conf)) {
- get_one_call_nolock(frame);
- submit_tail(frame, this);
- } else
- gf_log("crypt", GF_LOG_DEBUG,
- "nothing has been submitted in ordered mode");
- return;
-}
-
-static int32_t
-do_writev(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
-
- if (local->cur_file_size < local->data_conf.orig_offset) {
- /*
- * Set up hole config
- */
- op_errno = prepare_for_submit_hole(
- frame, this, local->cur_file_size,
- local->data_conf.orig_offset - local->cur_file_size);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto error;
- }
- }
- if (should_submit_hole(local))
- submit_hole(frame, this);
- else
- submit_data(frame, this);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_writev_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_writev, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-writev_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-int
-crypt_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vec,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
- (int)iov_length(vec, count), (long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- if (iobref)
- local->iobref = iobref_ref(iobref);
- /*
- * to update real file size on the server
- */
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- if (iov_length(vec, count) == 0)
- goto trivial;
-
- ret = prepare_for_submit_data(frame, this, offset,
- iov_length(vec, count),
- vec, count, 0 /* don't setup gup
- in tail: we don't
- know file size yet */);
- if (ret) {
- ret = ENOMEM;
- goto error;
- }
-
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * we are called by shinking crypt_ftruncate(),
- * which doesn't perform hole conversion;
- *
- * don't ask for access:
- * it has already been acquired
- */
-
- /*
- * extract file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- submit_data(frame, this);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- /*
- * lock the file and retrieve its size
- */
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_writev_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, writev_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->iobref)
- iobref_unref(iobref);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(writev, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- set_config_offsets(frame, this,
- offset,
- 0, /* count */
- DATA_ATOM,
- 0 /* since we prune, there is no
- gap in tail to uptodate */);
- return 0;
-}
-
-/*
- * Finish the read-prune-modify sequence
- *
- * Can be invoked as
- * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
- * 2) ->writev_cbk() for non-cblock-aligned prune
- */
-
-static int32_t
-prune_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as ->ftruncate_cbk()
- *
- * Perform the "write" component of the
- * read-prune-write sequence.
- *
- * submuit the rest of the file
- */
-static int32_t
-prune_submit_file_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- dict_t *dict;
-
- if (op_ret < 0)
- goto put_one_call;
-
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
- goto error;
- }
-
- update_local_file_params(frame, this, prebuf, postbuf);
- local->new_file_size = conf->orig_offset;
-
- /*
- * The rest of the file is a partial block and, hence,
- * should be written via RMW sequence, so the crypt xlator
- * does STACK_WIND to itself.
- *
- * Pass current file size to crypt_writev()
- */
- op_errno = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (op_errno) {
- gf_log("crypt", GF_LOG_WARNING, "can not set key to update file size");
- dict_unref(dict);
- goto error;
- }
- gf_log("crypt", GF_LOG_DEBUG,
- "passing current file size (%llu) to crypt_writev",
- (unsigned long long)local->cur_file_size);
- /*
- * Padding will be filled with
- * zeros by rmw_partial_block()
- */
- STACK_WIND(frame, prune_complete, this,
- this->fops->writev, /* crypt_writev */
- local->fd, &local->vec, 1,
- conf->aligned_offset, /* offset to write from */
- 0, local->iobref, dict);
-
- dict_unref(dict);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as a callback of ->writev() invoked in behalf
- * of ftruncate(): it can be
- * 1) ordered writes issued by hole conversion in the case of
- * expanded truncate, or
- * 2) an rmw partial data block issued by non-cblock-aligned
- * prune.
- */
-int32_t
-end_writeback_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- /*
- * if nothing has been written,
- * then it must be an error
- */
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto put_one_call;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local))
- /* case (2) */
- goto put_one_call;
- /* case (1) */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
- /*
- * case of hole, when we shouldn't resume
- */
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform prune and write components of the
- * read-prune-write sequence.
- *
- * Called as ->readv_cbk()
- *
- * Pre-conditions:
- * @vec contains the latest atom of the file
- * (plain text)
- */
-static int32_t
-prune_write(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- int32_t i;
- size_t to_copy;
- size_t copied = 0;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret == -1)
- goto put_one_call;
-
- /*
- * At first, uptodate head block
- */
- if (iov_length(vec, count) < conf->off_in_head) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to uptodate head block for prune");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- local->vec.iov_len = conf->off_in_head;
- local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len, gf_crypt_mt_data);
-
- if (local->vec.iov_base == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to calloc head block for prune");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto put_one_call;
- }
- for (i = 0; i < count; i++) {
- to_copy = vec[i].iov_len;
- if (to_copy > local->vec.iov_len - copied)
- to_copy = local->vec.iov_len - copied;
-
- memcpy((char *)local->vec.iov_base + copied, vec[i].iov_base, to_copy);
- copied += to_copy;
- if (copied == local->vec.iov_len)
- break;
- }
- /*
- * perform prune with aligned offset
- * (i.e. at this step we prune a bit
- * more then it is needed
- */
- STACK_WIND(frame, prune_submit_file_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->aligned_offset, local->xdata);
- return 0;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform a read-prune-write sequence
- */
-int32_t
-read_prune_write(call_frame_t *frame, xlator_t *this)
-{
- int32_t ret = 0;
- dict_t *dict = NULL;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_local_io_params_ftruncate(frame, object);
- get_one_call_nolock(frame);
-
- if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
- /*
- * cblock-aligned prune:
- * we don't need read and write components,
- * just cut file body
- */
- gf_log("crypt", GF_LOG_DEBUG, "prune without RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
-
- STACK_WIND(frame, prune_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->orig_offset, local->xdata);
- return 0;
- }
- gf_log("crypt", GF_LOG_DEBUG, "prune with RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
- /*
- * We are about to perform the "read" component of the
- * read-prune-write sequence. It means that we need to
- * read encrypted data from disk and decrypt it.
- * So, the crypt translator does STACK_WIND to itself.
- *
- * Pass current file size to crypt_readv()
-
- */
- dict = dict_new();
- if (!dict) {
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- ret = ENOMEM;
- goto exit;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, prune_write, this, this->fops->readv, /* crypt_readv */
- local->fd, get_atom_size(object), /* bytes to read */
- conf->aligned_offset, /* offset to read from */
- 0, dict);
-exit:
- if (dict)
- dict_unref(dict);
- return ret;
-}
-
-/*
- * File prune is more complicated than expand.
- * First we need to read the latest atom to not lose info
- * needed for proper update. Also we need to make sure that
- * every component of read-prune-write sequence leaves data
- * consistent
- *
- * Non-cblock aligned prune is performed as read-prune-write
- * sequence:
- *
- * 1) read the latest atom;
- * 2) perform cblock-aligned prune
- * 3) issue a write request for the end-of-file
- */
-int32_t
-prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
-
- ret = prepare_for_prune(frame, this, offset);
- if (ret)
- return ret;
- return read_prune_write(frame, this);
-}
-
-int32_t
-expand_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
-
- ret = prepare_for_submit_hole(frame, this, local->old_file_size,
- offset - local->old_file_size);
- if (ret)
- return ret;
- submit_hole(frame, this);
- return 0;
-}
-
-static int32_t
-ftruncate_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-do_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- if (local->data_conf.orig_offset == local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG,
- "trivial ftruncate (current file size %llu)",
- (unsigned long long)local->cur_file_size);
-#endif
- goto trivial;
- } else if (local->data_conf.orig_offset < local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = prune_file(frame, this, local->data_conf.orig_offset);
- } else {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = expand_file(frame, this, local->data_conf.orig_offset);
- }
- if (op_errno)
- goto error;
- return 0;
-trivial:
- STACK_WIND(frame, ftruncate_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, local->fd, NULL);
- return 0;
-error:
- /*
- * finish with ftruncate
- */
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_ftruncate, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * ftruncate is performed in 2 steps:
- * . receive file size;
- * . expand or prune file.
- */
-static int32_t
-crypt_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
- local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- local->data_conf.orig_offset = offset;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_ftruncate_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-/* ->flush_cbk() */
-int32_t
-truncate_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, &local->prebuf,
- &local->postbuf, local->xdata);
- return 0;
-}
-
-/* ftruncate_cbk() */
-int32_t
-truncate_flush(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *fd = local->fd;
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- STACK_WIND(frame, truncate_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, NULL);
- fd_unref(fd);
- return 0;
-}
-
-/*
- * is called as ->open_cbk()
- */
-static int32_t
-truncate_begin(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0) {
- fd_unref(fd);
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
- } else {
- fd_bind(fd);
- }
- /*
- * crypt_truncate() is implemented via crypt_ftruncate(),
- * so the crypt xlator does STACK_WIND to itself here
- */
- STACK_WIND(frame, truncate_flush, this,
- this->fops->ftruncate, /* crypt_ftruncate */
- fd, local->offset, NULL);
- return 0;
-}
-
-/*
- * crypt_truncate() is implemented via crypt_ftruncate() as a
- * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
- */
-int32_t
-crypt_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- fd_t *fd;
- crypt_local_t *local;
-
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG, "truncate file %s at offset %llu",
- loc->path, (unsigned long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
- if (!local)
- goto error;
-
- fd = fd_create(loc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->offset = offset;
- local->xdata = xdata;
- STACK_WIND(frame, truncate_begin, this, this->fops->open, /* crypt_open() */
- loc, O_RDWR, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
- return 0;
-}
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_WRITE:
- return end_writeback_writev;
- case GF_FOP_FTRUNCATE:
- return end_writeback_ftruncate;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
- return NULL;
- }
-}
-
-/*
- * true, if the caller needs metadata string
- */
-static int32_t
-is_custom_mtd(dict_t *xdata)
-{
- data_t *data;
- uint32_t flags;
-
- if (!xdata)
- return 0;
-
- data = dict_get(xdata, MSGFLAGS_PREFIX);
- if (!data)
- return 0;
- if (data->len != sizeof(uint32_t)) {
- gf_log("crypt", GF_LOG_WARNING, "Bad msgflags size (%d)", data->len);
- return -1;
- }
- flags = *((uint32_t *)data->data);
- return msgflags_check_mtd_lock(&flags);
-}
-
-static int32_t
-crypt_open_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret < 0)
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- put_one_call_open(frame);
- return 0;
-}
-
-static void
-crypt_open_tail(call_frame_t *frame, xlator_t *this)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_open_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
-}
-
-/*
- * load private inode info at open time
- * called as ->fgetxattr_cbk()
- */
-static int
-load_mtd_open(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- int32_t ret;
- gf_boolean_t upload_info;
- data_t *mtd;
- uint64_t value = 0;
- struct crypt_inode_info *info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- /*
- * first, check for cached info
- */
- ret = inode_ctx_get(local->fd->inode, this, &value);
- if (ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Inode info expected, but not found");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * info has been found in the cache
- */
- upload_info = _gf_false;
- } else {
- /*
- * info hasn't been found in the cache.
- */
- info = alloc_inode_info(local, local->loc);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto exit;
- }
- init_inode_info_head(info, local->fd);
- upload_info = _gf_true;
- }
- /*
- * extract metadata
- */
- mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
- if (!mtd) {
- local->op_ret = -1;
- local->op_errno = ENOENT;
- gf_log(this->name, GF_LOG_WARNING, "Format string wasn't found");
- goto exit;
- }
- /*
- * authenticate metadata against the path
- */
- ret = open_format((unsigned char *)mtd->data, mtd->len, local->loc, info,
- get_master_cinfo(priv), local, upload_info);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- if (upload_info) {
- ret = init_inode_info_tail(info, get_master_cinfo(priv));
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (ret == -1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- }
- if (local->custom_mtd) {
- /*
- * pass the metadata string to the customer
- */
- ret = dict_set_static_bin(local->xdata, CRYPTO_FORMAT_PREFIX, mtd->data,
- mtd->len);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- }
-exit:
- if (!local->custom_mtd)
- crypt_open_tail(frame, this);
- else
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
- goto exit;
- }
- STACK_WIND(frame, load_mtd_open, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- CRYPTO_FORMAT_PREFIX, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-/*
- * verify metadata against the specified pathname
- */
-static int32_t
-crypt_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- if (xdata)
- local->xdata = dict_ref(xdata);
- else if (local->custom_mtd) {
- local->xdata = dict_new();
- if (!local->xdata) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_ERROR,
- "Can not get new dict for mtd string");
- goto exit;
- }
- }
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_open_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc) {
- ret = ENOMEM;
- goto error;
- }
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- ret = is_custom_mtd(xdata);
- if (ret < 0) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- ret = EINVAL;
- goto error;
- }
- local->custom_mtd = ret;
-
- if ((flags & O_ACCMODE) == O_WRONLY)
- /*
- * we can't open O_WRONLY, because
- * we need to do read-modify-write
- */
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- get_one_call_nolock(frame);
- STACK_WIND(frame, crypt_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(open, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct object_cipher_info *object = &info->cinfo;
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
- uuid_utoa(info->oid));
-#endif
- ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
- master);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * Init inode info at ->create() time
- */
-static void
-init_inode_info_create(struct crypt_inode_info *info,
- struct master_cipher_info *master, data_t *data)
-{
- struct object_cipher_info *object;
-
- info->nr_minor = CRYPT_XLATOR_ID;
- memcpy(info->oid, data->data, data->len);
-
- object = &info->cinfo;
-
- object->o_alg = master->m_alg;
- object->o_mode = master->m_mode;
- object->o_block_bits = master->m_block_bits;
- object->o_dkey_size = master->m_dkey_size;
-}
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
-{
- memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
-}
-
-static int32_t
-crypt_create_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_private_t *priv = this->private;
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0) {
- free_inode_info(info);
- goto unwind;
- }
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno) {
- op_ret = -1;
- free_inode_info(info);
- goto unwind;
- }
- /*
- * FIXME: drop major subversion number
- */
- op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (op_ret == -1) {
- op_errno = EIO;
- free_inode_info(info);
- goto unwind;
- }
-unwind:
- free_format(local);
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int
-crypt_create_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- dict_unref(local->xattr);
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_create_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(local->info);
- free_format(local);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
-
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_create_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
-
- STACK_WIND(frame, crypt_create_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
- if (local->xdata)
- dict_unref(local->xdata);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-/*
- * Create and store crypt-specific format on disk;
- * Populate cache with private inode info
- */
-static int32_t
-crypt_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_create_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int ret;
- data_t *data;
- crypt_local_t *local;
- crypt_private_t *priv;
- struct master_cipher_info *master;
- struct crypt_inode_info *info;
-
- priv = this->private;
- master = get_master_cinfo(priv);
-
- if (master_alg_atomic(master)) {
- /*
- * We can't open O_WRONLY, because we
- * need to do read-modify-write.
- */
- if ((flags & O_ACCMODE) == O_WRONLY)
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- }
- local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- data = dict_get(xdata, "gfid-req");
- if (!data) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "gfid not found");
- goto error;
- }
- if (data->len != sizeof(uuid_t)) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "bad gfid size (%d), should be %d",
- (int)data->len, (int)sizeof(uuid_t));
- goto error;
- }
- info = alloc_inode_info(local, loc);
- if (!info) {
- ret = ENOMEM;
- goto error;
- }
- /*
- * NOTE:
- * format has to be created BEFORE
- * proceeding to the untrusted server
- */
- ret = alloc_format_create(local);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- init_inode_info_create(info, master, data);
-
- ret = create_format(local->format, loc, info, master);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- free_inode_info(info);
- free_format(local);
- goto error;
- }
- ret = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX, local->format,
- new_format_size());
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = EINVAL;
- goto error;
- }
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- STACK_WIND(frame, crypt_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-error:
- gf_log("crypt", GF_LOG_WARNING, "can not create file");
- CRYPT_STACK_UNWIND(create, frame, -1, ret, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-/*
- * FIXME: this should depends on the version of format string
- */
-static int32_t
-filter_crypt_xattr(dict_t *dict, char *key, data_t *value, void *data)
-{
- dict_del(dict, key);
- return 0;
-}
-
-static int32_t
-crypt_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-/*
- * TBD: verify file metadata before wind
- */
-static int32_t
-crypt_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-linkop_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0 && op_errno == ENOENT &&
- local->loc->inode->ia_type == IA_IFLNK) {
- local->op_ret = 0;
- local->op_errno = 0;
- }
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * unpin inode on the server
- */
-static int32_t
-link_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- link_unwind(frame);
- return 0;
-}
-
-void
-link_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- inode_t *inode;
-
- if (!local) {
- CRYPT_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- inode = local->inode;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, inode,
- &local->buf, &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (inode)
- inode_unref(inode);
-}
-
-void
-link_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, link_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, local->loc, local->newloc,
- local->xdata);
-}
-
-/*
- * unlink()
- */
-static int32_t
-unlink_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unlink_unwind(frame);
- return 0;
-}
-
-void
-unlink_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
-
- if (!local) {
- CRYPT_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
-}
-
-void
-unlink_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, unlink_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, local->loc, local->flags,
- local->xdata);
-}
-
-void
-rename_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
-
- if (!local) {
- CRYPT_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- prenewparent = local->prenewparent;
- postnewparent = local->postnewparent;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->buf, &local->prebuf, &local->postbuf,
- prenewparent, postnewparent, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (prenewparent)
- GF_FREE(prenewparent);
- if (postnewparent)
- GF_FREE(postnewparent);
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-rename_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- rename_unwind(frame);
- return 0;
-}
-
-static int32_t
-rename_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
- struct iatt *postoldparent, struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- dict_unref(local->xdata);
- local->xdata = NULL;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- local->buf = *buf;
- local->prebuf = *preoldparent;
- local->postbuf = *postoldparent;
- if (prenewparent) {
- local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
- gf_crypt_mt_iatt);
- if (!local->prenewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->prenewparent = *prenewparent;
- }
- if (postnewparent) {
- local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
- gf_crypt_mt_iatt);
- if (!local->postnewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->postnewparent = *postnewparent;
- }
- STACK_WIND(frame, rename_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- rename_unwind(frame);
- return 0;
-}
-
-void
-rename_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, rename_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, local->loc, local->newloc,
- local->xdata);
-}
-
-static int32_t
-__do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_wind_handler_t wind_fn;
- linkop_unwind_handler_t unwind_fn;
-
- wind_fn = linkop_wind_dispatch(local->fop);
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret >= 0)
- wind_fn(frame, this);
- else {
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- unwind_fn(frame);
- }
- return 0;
-}
-
-static int32_t
-do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * Update the metadata string (against the new pathname);
- * submit the result
- */
-static int32_t
-linkop_begin(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- gf_boolean_t upload_info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
- struct crypt_inode_info *info;
- data_t *old_mtd;
- uint32_t new_mtd_size;
- uint64_t value = 0;
- void (*unwind_fn)(call_frame_t * frame);
- mtd_op_t mop;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- mop = linkop_mtdop_dispatch(local->fop);
-
- if (op_ret < 0) {
- /*
- * verification failed
- */
- goto error;
- } else {
- fd_bind(fd);
- }
-
- old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
- if (!old_mtd) {
- op_errno = EIO;
- gf_log(this->name, GF_LOG_DEBUG, "Metadata string wasn't found");
- goto error;
- }
- new_mtd_size = format_size(mop, old_mtd->len);
- op_errno = alloc_format(local, new_mtd_size);
- if (op_errno)
- goto error;
- /*
- * check for cached info
- */
- op_ret = inode_ctx_get(fd->inode, this, &value);
- if (op_ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Inode info was not found");
- op_errno = EINVAL;
- goto error;
- }
- /*
- * info was found in the cache
- */
- local->info = info;
- upload_info = _gf_false;
- } else {
- /*
- * info wasn't found in the cache;
- */
- info = alloc_inode_info(local, local->loc);
- if (!info)
- goto error;
- init_inode_info_head(info, fd);
- local->info = info;
- upload_info = _gf_true;
- }
- op_errno = open_format((unsigned char *)old_mtd->data, old_mtd->len,
- local->loc, info, get_master_cinfo(priv), local,
- upload_info);
- if (op_errno)
- goto error;
- if (upload_info == _gf_true) {
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno)
- goto error;
- op_errno = inode_ctx_put(fd->inode, this, (uint64_t)(long)(info));
- if (op_errno == -1) {
- op_errno = EIO;
- goto error;
- }
- }
- /*
- * update the format string (append/update/cup a MAC)
- */
- op_errno = update_format(local->format, (unsigned char *)old_mtd->data,
- old_mtd->len, local->mac_idx, mop, local->newloc,
- info, get_master_cinfo(priv), local);
- if (op_errno)
- goto error;
- /*
- * store the new format string on the server
- */
- if (new_mtd_size) {
- op_errno = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX,
- local->format, new_mtd_size);
- if (op_errno)
- goto error;
- }
- STACK_WIND(frame, do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, local->loc, local->xattr, 0,
- NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-linkop_grab_local(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret = ENOMEM;
- fd_t *fd;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, op);
- if (!local)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- fd = fd_create(oldloc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->flags = flags;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, oldloc);
- if (ret) {
- GF_FREE(local->loc);
- local->loc = NULL;
- goto error;
- }
- if (newloc) {
- local->newloc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->newloc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- goto error;
- }
- ret = loc_copy(local->newloc, newloc);
- if (ret) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- GF_FREE(local->newloc);
- goto error;
- }
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- return 0;
-
-error:
- if (local) {
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
- local->fd = 0;
- local->loc = NULL;
- local->newloc = NULL;
- local->op_ret = -1;
- local->op_errno = ret;
- }
-
- return ret;
-}
-
-/*
- * read and verify locked metadata against the old pathname (via open);
- * update the metadata string in accordance with the new pathname;
- * submit modified metadata;
- * wind;
- */
-static int32_t
-linkop(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret;
- dict_t *dict;
- crypt_local_t *local;
- void (*unwind_fn)(call_frame_t * frame);
- void (*wind_fn)(call_frame_t * frame, xlator_t * this);
-
- wind_fn = linkop_wind_dispatch(op);
- unwind_fn = linkop_unwind_dispatch(op);
-
- ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
- local = frame->local;
- if (ret)
- goto error;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto wind;
-
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- /*
- * Set a message to crypt_open() that we need
- * locked metadata string.
- * All link operations (link, unlink, rename)
- * need write lock
- */
- msgflags_set_mtd_wlock(&local->msgflags);
- ret = dict_set_static_bin(dict, MSGFLAGS_PREFIX, &local->msgflags,
- sizeof(local->msgflags));
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
- dict_unref(dict);
- goto error;
- }
- /*
- * verify metadata against the old pathname
- * and retrieve locked metadata string
- */
- STACK_WIND(frame, linkop_begin, this, this->fops->open, /* crypt_open() */
- oldloc, O_RDWR, local->fd, dict);
- dict_unref(dict);
- return 0;
-
-wind:
- wind_fn(frame, this);
- return 0;
-
-error:
- local->op_ret = -1;
- local->op_errno = ret;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-crypt_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
-}
-
-static int32_t
-crypt_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
-}
-
-static int32_t
-crypt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
-}
-
-static void
-put_one_call_open(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- fd_t *fd = local->fd;
- loc_t *loc = local->loc;
- dict_t *xdata = local->xdata;
-
- CRYPT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, fd,
- xdata);
- fd_unref(fd);
- if (xdata)
- dict_unref(xdata);
- loc_wipe(loc);
- GF_FREE(loc);
- }
-}
-
-static int32_t
-__crypt_readv_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- /* read deals with data configs only */
- struct iovec *avec = local->data_conf.avec;
- char **pool = local->data_conf.pool;
- int blocks_in_pool = local->data_conf.blocks_in_pool;
- struct iobref *iobref = local->iobref;
- struct iobref *iobref_data = local->iobref_data;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "readv unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- dump_plain_text(local, avec);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
- (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
- (int)(local->rw_count > 0 ? iov_length(avec, local->data_conf.acount)
- : 0),
- (unsigned long long)local->buf.ia_size);
-
- CRYPT_STACK_UNWIND(
- readv, frame, local->rw_count > 0 ? local->rw_count : local->op_ret,
- local->op_errno, avec, avec ? local->data_conf.acount : 0, &local->buf,
- local->iobref, local_xdata);
-
- free_avec(avec, pool, blocks_in_pool);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobref)
- iobref_unref(iobref);
- if (iobref_data)
- iobref_unref(iobref_data);
- return 0;
-}
-
-static void
-crypt_readv_done(call_frame_t *frame, xlator_t *this)
-{
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
- else {
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_readv_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
-}
-
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local))
- crypt_readv_done(frame, this);
-}
-
-static int32_t
-__crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- int32_t ret_to_user;
-
- if (local->xattr)
- dict_unref(local->xattr);
- /*
- * Calculate amount of butes to be returned
- * to user. We need to subtract paddings that
- * have been written as a part of atom.
- */
- /*
- * subtract head padding
- */
- if (local->rw_count == 0)
- /*
- * Nothing has been written, it must be an error
- */
- ret_to_user = local->op_ret;
- else if (local->rw_count <= local->data_conf.off_in_head) {
- gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
- ret_to_user = 0;
- } else
- ret_to_user = local->rw_count - local->data_conf.off_in_head;
- /*
- * subtract tail padding
- */
- if (ret_to_user > local->data_conf.orig_size)
- ret_to_user = local->data_conf.orig_size;
-
- if (local->iobref)
- iobref_unref(local->iobref);
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG, "writev: ret_to_user: %d", ret_to_user);
-
- CRYPT_STACK_UNWIND(writev, frame, ret_to_user, local->op_errno,
- &local->prebuf, &local->postbuf, local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- else {
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
- return 0;
-}
-
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-static int32_t
-__crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- char *iobase = local->vec.iov_base;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "ftruncate unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "ftruncate, return to user: presize=%llu, postsize=%llu",
- (unsigned long long)local->prebuf.ia_size,
- (unsigned long long)local->postbuf.ia_size);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, ((local->op_ret < 0) ? -1 : 0),
- local->op_errno, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobase)
- GF_FREE(iobase);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- dict_unref(local->xattr);
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-}
-
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-/*
- * load regular file size for some FOPs
- */
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0)
- goto unwind;
- /*
- * load regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- if (local->xdata)
- dict_unref(local->xdata);
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto unwind;
- }
- local->buf.ia_size = data_to_uint64(data);
-
- gf_log(this->name, GF_LOG_DEBUG, "FOP %d: Translate regular file to %llu",
- local->fop, (unsigned long long)local->buf.ia_size);
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_LOOKUP:
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno,
- op_ret >= 0 ? local->inode : NULL,
- op_ret >= 0 ? &local->buf : NULL, local->xdata,
- op_ret >= 0 ? &local->postbuf : NULL);
- break;
- case GF_FOP_READ:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0,
- op_ret >= 0 ? &local->buf : NULL, NULL, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- if (local_xdata)
- dict_unref(local_xdata);
- if (local_inode)
- inode_unref(local_inode);
- return 0;
-}
-
-static int32_t
-crypt_stat_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->buf = *buf;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- switch (local->fop) {
- case GF_FOP_FSTAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- case GF_FOP_STAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-}
-
-static int32_t
-crypt_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
- if (!local)
- goto error;
- local->fd = fd_ref(fd);
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_STAT);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->postbuf = *postparent;
- if (xdata)
- local->xdata = dict_ref(xdata);
- gf_uuid_copy(local->loc->gfid, buf->ia_gfid);
-
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-unwind:
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-static int32_t
-crypt_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
- STACK_WIND(frame, crypt_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-/*
- * for every regular directory entry find its real file size
- * and update stat's buf properly
- */
-static int32_t
-crypt_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *entry = NULL;
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(entry, (&entries->list), list)
- {
- data_t *data;
-
- if (!IA_ISREG(entry->d_stat.ia_type))
- continue;
- data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size of direntry not found");
- op_errno = EIO;
- op_ret = -1;
- break;
- }
- entry->d_stat.ia_size = data_to_uint64(data);
- }
-unwind:
- CRYPT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-/*
- * ->readdirp() fills in-core inodes, so we need to set proper
- * file sizes for all directory entries of the parent @fd.
- * Actual updates take place in ->crypt_readdirp_cbk()
- */
-static int32_t
-crypt_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
-
- if (!xdata) {
- xdata = dict_new();
- if (!xdata)
- goto error;
- } else
- dict_ref(xdata);
- /*
- * make sure that we'll have real file sizes at ->readdirp_cbk()
- */
- ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(xdata);
- ret = ENOMEM;
- goto error;
- }
- STACK_WIND(frame, crypt_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- dict_unref(xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readdirp, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- gf_log(this->name, GF_LOG_WARNING,
- "NFS mounts of encrypted volumes are unsupported");
- CRYPT_STACK_UNWIND(access, frame, -1, EPERM, NULL);
- return 0;
-}
-
-int32_t
-master_set_block_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- uint64_t block_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("block-size", block_size, options, size_uint64, error);
- else
- GF_OPTION_INIT("block-size", block_size, size_uint64, error);
-
- switch (block_size) {
- case 512:
- master->m_block_bits = 9;
- break;
- case 1024:
- master->m_block_bits = 10;
- break;
- case 2048:
- master->m_block_bits = 11;
- break;
- case 4096:
- master->m_block_bits = 12;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "FATAL: unsupported block size %llu",
- (unsigned long long)block_size);
- goto error;
- }
- return 0;
-error:
- return -1;
-}
-
-int32_t
-master_set_alg(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_alg = AES_CIPHER_ALG;
- return 0;
-}
-
-int32_t
-master_set_mode(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_mode = XTS_CIPHER_MODE;
- return 0;
-}
-
-/*
- * set key size in bits to the master info
- * Pre-conditions: cipher mode in the master info is uptodate.
- */
-static int
-master_set_data_key_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- int32_t ret;
- uint64_t key_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("data-key-size", key_size, options, uint64, error);
- else
- GF_OPTION_INIT("data-key-size", key_size, uint64, error);
-
- ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: wrong bin key size %llu for alg %d mode %d",
- (unsigned long long)key_size, (int)master->m_alg,
- (int)master->m_mode);
- goto error;
- }
- master->m_dkey_size = key_size;
- return 0;
-error:
- return -1;
-}
-
-static int
-is_hex(char *s)
-{
- return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
-}
-
-static int
-parse_hex_buf(xlator_t *this, char *src, unsigned char *dst, int hex_size)
-{
- int i;
- int hex_byte = 0;
-
- for (i = 0; i < (hex_size / 2); i++) {
- if (!is_hex(src + i * 2) || !is_hex(src + i * 2 + 1)) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: not hex symbol in key");
- return -1;
- }
- if (sscanf(src + i * 2, "%2x", &hex_byte) != 1) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: can not parse hex key");
- return -1;
- }
- dst[i] = hex_byte & 0xff;
- }
- return 0;
-}
-
-/*
- * Parse options;
- * install master volume key
- */
-int32_t
-master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- int32_t ret;
- FILE *file = NULL;
-
- int32_t key_size;
- char *opt_key_file_pathname = NULL;
-
- unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
- char hex_buf[2 * MASTER_VOL_KEY_SIZE];
-
- struct master_cipher_info *master = get_master_cinfo(priv);
- /*
- * extract master key passed via option
- */
- GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
-
- if (!opt_key_file_pathname) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
- return -1;
- }
- gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
- opt_key_file_pathname);
-
- file = fopen(opt_key_file_pathname, "r");
- if (file == NULL) {
- gf_log(this->name, GF_LOG_ERROR,
- "FATAL: can not open file with master key");
- return -1;
- }
- /*
- * extract hex key
- */
- key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
- if (key_size < sizeof(hex_buf)) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: master key is too short");
- goto bad_key;
- }
- ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
- if (ret)
- goto bad_key;
- memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
- memset(hex_buf, 0, sizeof(hex_buf));
- fclose(file);
-
- memset(bin_buf, 0, sizeof(bin_buf));
- return 0;
-bad_key:
- gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
- if (file)
- fclose(file);
- memset(bin_buf, 0, sizeof(bin_buf));
- return -1;
-}
-
-/*
- * Derive volume key for object-id authentication
- */
-int32_t
-master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- return get_nmtd_vol_key(get_master_cinfo(priv));
-}
-
-int32_t
-crypt_init_xlator(xlator_t *this)
-{
- int32_t ret;
- crypt_private_t *priv = this->private;
-
- ret = master_set_alg(this, priv);
- if (ret)
- return ret;
- ret = master_set_mode(this, priv);
- if (ret)
- return ret;
- ret = master_set_block_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_data_key_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_master_vol_key(this, priv);
- if (ret)
- return ret;
- return master_set_nmtd_vol_key(this, priv);
-}
-
-static int32_t
-crypt_alloc_private(xlator_t *this)
-{
- this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
- if (!this->private) {
- gf_log("crypt", GF_LOG_ERROR,
- "Can not allocate memory for private data");
- return ENOMEM;
- }
- return 0;
-}
-
-static void
-crypt_free_private(xlator_t *this)
-{
- crypt_private_t *priv = this->private;
- if (priv) {
- memset(priv, 0, sizeof(*priv));
- GF_FREE(priv);
- }
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init(this, gf_crypt_mt_end);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-reconfigure(xlator_t *this, dict_t *options)
-{
- int32_t ret = -1;
- crypt_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("crypt", this, error);
- GF_VALIDATE_OR_GOTO(this->name, this->private, error);
- GF_VALIDATE_OR_GOTO(this->name, options, error);
-
- priv = this->private;
-
- ret = master_set_block_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure block size");
- goto error;
- }
- ret = master_set_data_key_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure data key size");
- goto error;
- }
- return 0;
-error:
- return ret;
-}
-
-int32_t
-init(xlator_t *this)
-{
- int32_t ret;
-
- if (!this->children || this->children->next) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: crypt should have exactly one child");
- return EINVAL;
- }
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
- ret = crypt_alloc_private(this);
- if (ret)
- return ret;
- ret = crypt_init_xlator(this);
- if (ret)
- goto error;
- this->local_pool = mem_pool_new(crypt_local_t, 64);
- if (!this->local_pool) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- ret = ENOMEM;
- goto error;
- }
- gf_log("crypt", GF_LOG_INFO, "crypt xlator loaded");
- return 0;
-error:
- crypt_free_private(this);
- return ret;
-}
-
-void
-fini(xlator_t *this)
-{
- crypt_free_private(this);
-}
-
-struct xlator_fops fops = {.readv = crypt_readv,
- .writev = crypt_writev,
- .truncate = crypt_truncate,
- .ftruncate = crypt_ftruncate,
- .setxattr = crypt_setxattr,
- .fsetxattr = crypt_fsetxattr,
- .link = crypt_link,
- .unlink = crypt_unlink,
- .rename = crypt_rename,
- .open = crypt_open,
- .create = crypt_create,
- .stat = crypt_stat,
- .fstat = crypt_fstat,
- .lookup = crypt_lookup,
- .readdirp = crypt_readdirp,
- .access = crypt_access};
-
-struct xlator_cbks cbks = {.forget = crypt_forget};
-
-struct volume_options options[] = {
- {.key = {"master-key"},
- .type = GF_OPTION_TYPE_PATH,
- .description =
- "Pathname of regular file which contains master volume key"},
- {
- .key = {"data-key-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Data key size (bits)",
- .min = 256,
- .max = 512,
- .default_value = "256",
- },
- {.key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Atom size (bits)",
- .min = 512,
- .max = 4096,
- .default_value = "4096"},
- {.key = {NULL}},
-};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
deleted file mode 100644
index 390eee831b1..00000000000
--- a/xlators/encryption/crypt/src/crypt.h
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_H__
-#define __CRYPT_H__
-
-#include <openssl/aes.h>
-#include <openssl/evp.h>
-#include <openssl/sha.h>
-#include <openssl/hmac.h>
-#include <openssl/cmac.h>
-#include <openssl/modes.h>
-#include "crypt-mem-types.h"
-#include "compat.h"
-
-#define CRYPT_XLATOR_ID (0)
-
-#define MAX_IOVEC_BITS (3)
-#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
-#define KEY_FACTOR_BITS (6)
-
-#define DEBUG_CRYPT (0)
-#define TRIVIAL_TFM (0)
-
-#define CRYPT_MIN_BLOCK_BITS (9)
-#define CRYPT_MAX_BLOCK_BITS (12)
-
-#define MASTER_VOL_KEY_SIZE (32)
-#define NMTD_VOL_KEY_SIZE (16)
-
-#if !defined(GF_LINUX_HOST_OS)
-typedef off_t loff_t;
-#endif
-
-struct crypt_key {
- uint32_t len;
- const char *label;
-};
-
-/*
- * Add new key types to the end of this
- * enumeration but before LAST_KEY_TYPE
- */
-typedef enum {
- MASTER_VOL_KEY,
- NMTD_VOL_KEY,
- NMTD_LINK_KEY,
- EMTD_FILE_KEY,
- DATA_FILE_KEY_256,
- DATA_FILE_KEY_512,
- LAST_KEY_TYPE
-} crypt_key_type;
-
-struct kderive_context {
- const unsigned char *pkey; /* parent key */
- uint32_t pkey_len; /* parent key size, bits */
- uint32_t ckey_len; /* child key size, bits */
- unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
- uint32_t fid_len; /* fid len, bytes */
- unsigned char *out; /* contains child keying material */
- uint32_t out_len; /* out len, bytes */
-};
-
-typedef enum { DATA_ATOM, HOLE_ATOM, LAST_DATA_TYPE } atom_data_type;
-
-typedef enum {
- HEAD_ATOM,
- TAIL_ATOM,
- FULL_ATOM,
- LAST_LOCALITY_TYPE
-} atom_locality_type;
-
-typedef enum {
- MTD_CREATE,
- MTD_APPEND,
- MTD_OVERWRITE,
- MTD_CUT,
- MTD_LAST_OP
-} mtd_op_t;
-
-struct xts128_context {
- void *key1, *key2;
- block128_f block1, block2;
-};
-
-struct object_cipher_info {
- cipher_alg_t o_alg;
- cipher_mode_t o_mode;
- uint32_t o_block_bits;
- uint32_t o_dkey_size; /* raw data key size in bits */
- union {
- struct {
- unsigned char ivec[16];
- AES_KEY dkey[2];
- AES_KEY tkey; /* key used for tweaking */
- XTS128_CONTEXT xts;
- } aes_xts;
- } u;
-};
-
-struct master_cipher_info {
- /*
- * attributes inherited by newly created regular files
- */
- cipher_alg_t m_alg;
- cipher_mode_t m_mode;
- uint32_t m_block_bits;
- uint32_t m_dkey_size; /* raw key size in bits */
- /*
- * master key
- */
- unsigned char m_key[MASTER_VOL_KEY_SIZE];
- /*
- * volume key for oid authentication
- */
- unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
-};
-
-/*
- * This info is not changed during file's life
- */
-struct crypt_inode_info {
-#if DEBUG_CRYPT
- loc_t *loc; /* pathname that the file has been
- opened, or created with */
-#endif
- uint16_t nr_minor;
- uuid_t oid;
- struct object_cipher_info cinfo;
-};
-
-/*
- * this should locate in secure memory
- */
-typedef struct {
- struct master_cipher_info master;
-} crypt_private_t;
-
-static inline struct master_cipher_info *
-get_master_cinfo(crypt_private_t *priv)
-{
- return &priv->master;
-}
-
-static inline struct object_cipher_info *
-get_object_cinfo(struct crypt_inode_info *info)
-{
- return &info->cinfo;
-}
-
-/*
- * this describes layouts and properties
- * of atoms in an aligned vector
- */
-struct avec_config {
- uint32_t atom_size;
- atom_data_type type;
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head;
- uint32_t off_in_tail;
- uint32_t gap_in_tail;
- uint32_t nr_full_blocks;
-
- struct iovec *avec; /* aligned vector */
- uint32_t acount; /* number of avec components. The same
- * as number of occupied logical blocks */
- char **pool;
- uint32_t blocks_in_pool;
- uint32_t cursor; /* makes sense only for ordered writes,
- * so there is no races on this counter.
- *
- * Cursor is per-config object, we don't
- * reset cursor for atoms of different
- * localities (head, tail, full)
- */
-};
-
-typedef struct {
- glusterfs_fop_t fop; /* code of FOP this local info built for */
- fd_t *fd;
- inode_t *inode;
- loc_t *loc;
- int32_t mac_idx;
- loc_t *newloc;
- int32_t flags;
- int32_t wbflags;
- struct crypt_inode_info *info;
- struct iobref *iobref;
- struct iobref *iobref_data;
- off_t offset;
-
- uint64_t old_file_size; /* per FOP, retrieved under lock held */
- uint64_t cur_file_size; /* per iteration, before issuing IOs */
- uint64_t new_file_size; /* per iteration, after issuing IOs */
-
- uint64_t io_offset; /* offset of IOs issued per iteration */
- uint64_t io_offset_nopad; /* offset of user's data in the atom */
- uint32_t io_size; /* size of IOs issued per iteration */
- uint32_t io_size_nopad; /* size of user's data in the IOs */
- uint32_t eof_padding_size; /* size od EOF padding in the IOs */
-
- gf_lock_t call_lock; /* protect nr_calls from many cbks */
- int32_t nr_calls;
-
- atom_data_type active_setup; /* which setup (hole or date)
- is currently active */
- /* data setup */
- struct avec_config data_conf;
-
- /* hole setup */
- int hole_conv_in_proggress;
- gf_lock_t hole_lock; /* protect hole config from many cbks */
- int hole_handled;
- struct avec_config hole_conf;
- struct iatt buf;
- struct iatt prebuf;
- struct iatt postbuf;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
- int32_t op_ret;
- int32_t op_errno;
- int32_t rw_count; /* total read or written */
- gf_lock_t rw_count_lock; /* protect the counter above */
- unsigned char *format; /* for create, update format string */
- uint32_t format_size;
- uint32_t msgflags; /* messages for crypt_open() */
- dict_t *xdata;
- dict_t *xattr;
- struct iovec vec; /* contains last file's atom for
- read-prune-write sequence */
- gf_boolean_t custom_mtd;
- /*
- * the next 3 fields are used by readdir and friends
- */
- gf_dirent_t *de; /* directory entry */
- char *de_path; /* pathname of directory entry */
- uint32_t de_prefix_len; /* length of the parent's pathname */
- gf_dirent_t *entries;
-
- uint32_t update_disk_file_size : 1;
-} crypt_local_t;
-
-/* This represents a (read)modify-write atom */
-struct rmw_atom {
- atom_locality_type locality;
- /*
- * read-modify-write sequence of the atom
- */
- int32_t (*rmw)(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata);
- /*
- * offset of the logical block in a file
- */
- loff_t (*offset_at)(call_frame_t *frame, struct object_cipher_info *object);
- /*
- * IO offset in an atom
- */
- uint32_t (*offset_in)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * number of bytes of plain text of this atom that user
- * wants to read/write.
- * It can be smaller than atom_size in the case of head
- * or tail atoms.
- */
- uint32_t (*io_size_nopad)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * which iovec represents the atom
- */
- struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
- /*
- * how many bytes of partial block should be uptodated by
- * reading from disk.
- * This is used to perform a read component of RMW (read-modify-write).
- */
- uint32_t (*count_to_uptodate)(call_frame_t *frame,
- struct object_cipher_info *object);
- struct avec_config *(*get_config)(call_frame_t *frame);
-};
-
-struct data_cipher_alg {
- gf_boolean_t atomic; /* true means that algorithm requires
- to pad data before cipher transform */
- gf_boolean_t should_pad; /* true means that algorithm requires
- to pad the end of file with extra-data */
- uint32_t blkbits; /* blksize = 1 << blkbits */
- /*
- * any preliminary sanity checks goes here
- */
- int32_t (*init)(void);
- /*
- * set alg-mode specific inode info
- */
- int32_t (*set_private)(struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * check alg-mode specific data key
- */
- int32_t (*check_key)(uint32_t key_size);
- void (*set_iv)(off_t offset, struct object_cipher_info *object);
- int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
- size_t length, off_t offset, const int enc,
- struct object_cipher_info *object);
-};
-
-/*
- * version-dependent metadata loader
- */
-struct crypt_mtd_loader {
- /*
- * return core format size
- */
- size_t (*format_size)(mtd_op_t op, size_t old_size);
- /*
- * pack version-specific metadata of an object
- * at ->create()
- */
- int32_t (*create_format)(unsigned char *wire, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * extract version-specific metadata of an object
- * at ->open() time
- */
- int32_t (*open_format)(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
- int32_t (*update_format)(unsigned char *new, unsigned char *old,
- size_t old_len, int32_t mac_idx, mtd_op_t op,
- loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local);
-};
-
-typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
-typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
-
-/* Declarations */
-
-/* keys.c */
-extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master);
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result);
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result);
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key);
-/* data.c */
-extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG]
- [LAST_CIPHER_MODE];
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf);
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count);
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop);
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype);
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype,
- int32_t setup_gap_in_tail);
-
-/* metadata.c */
-extern struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER];
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size);
-int32_t
-alloc_format_create(crypt_local_t *local);
-void
-free_format(crypt_local_t *local);
-size_t
-format_size(mtd_op_t op, size_t old_size);
-size_t
-new_format_size(void);
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local);
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master);
-
-/* atom.c */
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality);
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype);
-void
-submit_full(call_frame_t *frame, xlator_t *this);
-
-/* crypt.c */
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop);
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size);
-void
-link_wind(call_frame_t *frame, xlator_t *this);
-void
-unlink_wind(call_frame_t *frame, xlator_t *this);
-void
-link_unwind(call_frame_t *frame);
-void
-unlink_unwind(call_frame_t *frame);
-void
-rename_wind(call_frame_t *frame, xlator_t *this);
-void
-rename_unwind(call_frame_t *frame);
-
-/* Inline functions */
-
-static inline int32_t
-crypt_xlator_id(void)
-{
- return CRYPT_XLATOR_ID;
-}
-
-static inline mtd_loader_id
-current_mtd_loader(void)
-{
- return MTD_LOADER_V1;
-}
-
-static inline uint32_t
-master_key_size(void)
-{
- return crypt_keys[MASTER_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-nmtd_vol_key_size(void)
-{
- return crypt_keys[NMTD_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-alg_mode_blkbits(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].blkbits;
-}
-
-static inline uint32_t
-alg_mode_blksize(cipher_alg_t alg, cipher_mode_t mode)
-{
- return 1 << alg_mode_blkbits(alg, mode);
-}
-
-static inline gf_boolean_t
-alg_mode_atomic(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].atomic;
-}
-
-static inline gf_boolean_t
-alg_mode_should_pad(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].should_pad;
-}
-
-static inline uint32_t
-master_alg_blksize(struct master_cipher_info *mr)
-{
- return alg_mode_blksize(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-master_alg_blkbits(struct master_cipher_info *mr)
-{
- return alg_mode_blkbits(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_atomic(struct master_cipher_info *mr)
-{
- return alg_mode_atomic(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_should_pad(struct master_cipher_info *mr)
-{
- return alg_mode_should_pad(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-object_alg_blksize(struct object_cipher_info *ob)
-{
- return alg_mode_blksize(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-object_alg_blkbits(struct object_cipher_info *ob)
-{
- return alg_mode_blkbits(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_atomic(struct object_cipher_info *ob)
-{
- return alg_mode_atomic(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_should_pad(struct object_cipher_info *ob)
-{
- return alg_mode_should_pad(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-aes_raw_key_size(struct master_cipher_info *master)
-{
- return master->m_dkey_size >> 3;
-}
-
-static inline struct avec_config *
-get_hole_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->hole_conf);
-}
-
-static inline struct avec_config *
-get_data_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->data_conf);
-}
-
-static inline int32_t
-get_atom_bits(struct object_cipher_info *object)
-{
- return object->o_block_bits;
-}
-
-static inline int32_t
-get_atom_size(struct object_cipher_info *object)
-{
- return 1 << get_atom_bits(object);
-}
-
-static inline int32_t
-has_head_block(struct avec_config *conf)
-{
- return conf->off_in_head || (conf->acount == 1 && conf->off_in_tail);
-}
-
-static inline int32_t
-has_tail_block(struct avec_config *conf)
-{
- return conf->off_in_tail && conf->acount > 1;
-}
-
-static inline int32_t
-has_full_blocks(struct avec_config *conf)
-{
- return conf->nr_full_blocks;
-}
-
-static inline int32_t
-should_submit_head_block(struct avec_config *conf)
-{
- return has_head_block(conf) && (conf->cursor == 0);
-}
-
-static inline int32_t
-should_submit_tail_block(struct avec_config *conf)
-{
- return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
-}
-
-static inline int32_t
-should_submit_full_block(struct avec_config *conf)
-{
- uint32_t start = has_head_block(conf) ? 1 : 0;
-
- return has_full_blocks(conf) && conf->cursor >= start &&
- conf->cursor < start + conf->nr_full_blocks;
-}
-
-#if DEBUG_CRYPT
-static inline void
-crypt_check_input_len(size_t len, struct object_cipher_info *object)
-{
- if (object_alg_should_pad(object) &&
- (len & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
-}
-
-static inline void
-check_head_block(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
-}
-
-static inline void
-check_tail_block(struct avec_config *conf)
-{
- if (!has_tail_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
-}
-
-static inline void
-check_full_block(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
-}
-
-static inline void
-check_cursor_head(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of head atom method");
- else if (conf->cursor != 0)
- gf_log("crypt", GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
- conf->cursor);
-}
-
-static inline void
-check_cursor_full(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of full atom method");
- if (has_head_block(conf) && (conf->cursor == 0))
- gf_log("crypt", GF_LOG_DEBUG, "Cursor is not at full atom");
-}
-
-/*
- * FIXME: use avec->iov_len to check setup
- */
-static inline int
-data_local_invariant(crypt_local_t *local)
-{
- return 0;
-}
-
-#else
-#define crypt_check_input_len(len, object) noop
-#define check_head_block(conf) noop
-#define check_tail_block(conf) noop
-#define check_full_block(conf) noop
-#define check_cursor_head(conf) noop
-#define check_cursor_full(conf) noop
-
-#endif /* DEBUG_CRYPT */
-
-static inline struct avec_config *
-conf_by_type(call_frame_t *frame, atom_data_type dtype)
-{
- struct avec_config *conf = NULL;
-
- switch (dtype) {
- case HOLE_ATOM:
- conf = get_hole_conf(frame);
- break;
- case DATA_ATOM:
- conf = get_data_conf(frame);
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
- }
- return conf;
-}
-
-static inline uint32_t
-nr_calls_head(struct avec_config *conf)
-{
- return has_head_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_tail(struct avec_config *conf)
-{
- return has_tail_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_full(struct avec_config *conf)
-{
- switch (conf->type) {
- case HOLE_ATOM:
- return has_full_blocks(conf);
- case DATA_ATOM:
- return has_full_blocks(conf)
- ? logical_blocks_occupied(0, conf->nr_full_blocks,
- MAX_IOVEC_BITS)
- : 0;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
- return 0;
- }
-}
-
-static inline uint32_t
-nr_calls(struct avec_config *conf)
-{
- return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
-}
-
-static inline uint32_t
-nr_calls_data(call_frame_t *frame)
-{
- return nr_calls(get_data_conf(frame));
-}
-
-static inline uint32_t
-nr_calls_hole(call_frame_t *frame)
-{
- return nr_calls(get_hole_conf(frame));
-}
-
-static inline void
-get_one_call_nolock(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- ++local->nr_calls;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
-}
-
-static inline void
-get_one_call(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_one_call_nolock(frame);
- UNLOCK(&local->call_lock);
-}
-
-static inline void
-get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- local->nr_calls += nr;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
-}
-
-static inline void
-get_nr_calls(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_nr_calls_nolock(frame, nr);
- UNLOCK(&local->call_lock);
-}
-
-static inline int
-put_one_call(crypt_local_t *local)
-{
- uint32_t last = 0;
-
- LOCK(&local->call_lock);
- if (--local->nr_calls == 0)
- last = 1;
-
- // gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
-
- UNLOCK(&local->call_lock);
- return last;
-}
-
-static inline int
-is_appended_write(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->orig_offset + conf->orig_size > local->old_file_size;
-}
-
-static inline int
-is_ordered_mode(call_frame_t *frame)
-{
-#if 0
- crypt_local_t *local = frame->local;
- return local->fop == GF_FOP_FTRUNCATE ||
- (local->fop == GF_FOP_WRITE && is_appended_write(frame));
-#endif
- return 1;
-}
-
-static inline int32_t
-hole_conv_completed(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
- return conf->cursor == conf->acount;
-}
-
-static inline int32_t
-data_write_in_progress(crypt_local_t *local)
-{
- return local->active_setup == DATA_ATOM;
-}
-
-static inline int32_t
-parent_is_crypt_xlator(call_frame_t *frame, xlator_t *this)
-{
- return frame->parent->this == this;
-}
-
-static inline linkop_wind_handler_t
-linkop_wind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_wind;
- case GF_FOP_UNLINK:
- return unlink_wind;
- case GF_FOP_RENAME:
- return rename_wind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline linkop_unwind_handler_t
-linkop_unwind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_unwind;
- case GF_FOP_UNLINK:
- return unlink_unwind;
- case GF_FOP_RENAME:
- return rename_unwind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline mtd_op_t
-linkop_mtdop_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return MTD_APPEND;
- case GF_FOP_UNLINK:
- return MTD_CUT;
- case GF_FOP_RENAME:
- return MTD_OVERWRITE;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
- return MTD_LAST_OP;
- }
-}
-
-#define CRYPT_STACK_UNWIND(fop, frame, params...) \
- do { \
- crypt_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- GF_FREE(__local); \
- } \
- } while (0)
-
-#endif /* __CRYPT_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
deleted file mode 100644
index 8e8701b6bf2..00000000000
--- a/xlators/encryption/crypt/src/data.c
+++ /dev/null
@@ -1,715 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
-{
- unsigned char *ivec;
-
- ivec = object->u.aes_xts.ivec;
-
- /* convert the tweak into a little-endian byte
- * array (IEEE P1619/D16, May 2007, section 5.1)
- */
-
- *((uint64_t *)ivec) = htole64(offset);
-
- /* ivec is padded with zeroes */
-}
-
-static int32_t
-aes_set_keys_common(unsigned char *raw_key, uint32_t key_size, AES_KEY *keys)
-{
- int32_t ret;
-
- ret = AES_set_encrypt_key(raw_key, key_size, &keys[AES_ENCRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
- return ret;
- }
- ret = AES_set_decrypt_key(raw_key, key_size, &keys[AES_DECRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * set private cipher info for xts mode
- */
-static int32_t
-set_private_aes_xts(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int ret;
- struct object_cipher_info *object = get_object_cinfo(info);
- unsigned char *data_key;
- uint32_t subkey_size;
-
- /* init tweak value */
- memset(object->u.aes_xts.ivec, 0, 16);
-
- data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
- if (!data_key)
- return ENOMEM;
-
- /*
- * retrieve data keying material
- */
- ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
- GF_FREE(data_key);
- return ret;
- }
- /*
- * parse compound xts key
- */
- subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
- /*
- * install key for data encryption
- */
- ret = aes_set_keys_common(data_key, subkey_size << 3,
- object->u.aes_xts.dkey);
- if (ret) {
- GF_FREE(data_key);
- return ret;
- }
- /*
- * set up key used to encrypt tweaks
- */
- ret = AES_set_encrypt_key(data_key + subkey_size, object->o_dkey_size / 2,
- &object->u.aes_xts.tkey);
- if (ret < 0)
- gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
-
- GF_FREE(data_key);
- return ret;
-}
-
-static int32_t
-aes_xts_init(void)
-{
- cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
- return 0;
-}
-
-static int32_t
-check_key_aes_xts(uint32_t keysize)
-{
- switch (keysize) {
- case 256:
- case 512:
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-static int32_t
-encrypt_aes_xts(const unsigned char *from, unsigned char *to, size_t length,
- off_t offset, const int enc, struct object_cipher_info *object)
-{
- XTS128_CONTEXT ctx;
- if (enc) {
- ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
- ctx.block1 = (block128_f)AES_encrypt;
- } else {
- ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
- ctx.block1 = (block128_f)AES_decrypt;
- }
- ctx.key2 = &object->u.aes_xts.tkey;
- ctx.block2 = (block128_f)AES_encrypt;
-
- return CRYPTO_xts128_encrypt(&ctx, object->u.aes_xts.ivec, from, to, length,
- enc);
-}
-
-/*
- * Cipher input chunk @from of length @len;
- * @to: result of cipher transform;
- * @off: offset in a file (must be cblock-aligned);
- */
-static void
-cipher_data(struct object_cipher_info *object, char *from, char *to, off_t off,
- size_t len, const int enc)
-{
- crypt_check_input_len(len, object);
-
-#if TRIVIAL_TFM && DEBUG_CRYPT
- return;
-#endif
- data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
- data_cipher_algs[object->o_alg][object->o_mode].encrypt(
- (const unsigned char *)from, (unsigned char *)to, len, off, enc,
- object);
-}
-
-#define MAX_CIPHER_CHUNK (1 << 30)
-
-/*
- * Do cipher (encryption/decryption) transform of a
- * continuous region of memory.
- *
- * @len: a number of bytes to transform;
- * @buf: data to transform;
- * @off: offset in a file, should be block-aligned
- * for atomic cipher modes and ksize-aligned
- * for other modes).
- * @dir: direction of transform (encrypt/decrypt).
- */
-static void
-cipher_region(struct object_cipher_info *object, char *from, char *to,
- off_t off, size_t len, int dir)
-{
- while (len > 0) {
- size_t to_cipher;
-
- to_cipher = len;
- if (to_cipher > MAX_CIPHER_CHUNK)
- to_cipher = MAX_CIPHER_CHUNK;
-
- /* this will reset IV */
- cipher_data(object, from, to, off, to_cipher, dir);
- from += to_cipher;
- to += to_cipher;
- off += to_cipher;
- len -= to_cipher;
- }
-}
-
-/*
- * Do cipher transform (encryption/decryption) of
- * plaintext/ciphertext represented by @vec.
- *
- * Pre-conditions: @vec represents a continuous piece
- * of data in a file at offset @off to be ciphered
- * (encrypted/decrypted).
- * @count is the number of vec's components. All the
- * components must be block-aligned, the caller is
- * responsible for this. @dir is "direction" of
- * transform (encrypt/decrypt).
- */
-static void
-cipher_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off, int32_t dir)
-{
- int i;
- int len = 0;
-
- for (i = 0; i < count; i++) {
- cipher_region(object, vec[i].iov_base, vec[i].iov_base, off + len,
- vec[i].iov_len, dir);
- len += vec[i].iov_len;
- }
-}
-
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 1);
-}
-
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 0);
-}
-
-#if DEBUG_CRYPT
-static void
-compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
-{
- int i;
- int off = 0;
- for (i = 0; i < count; i++) {
- memcpy(buf + off, vec[i].iov_base + skip, vec[i].iov_len - skip);
-
- off += (vec[i].iov_len - skip);
- skip = 0;
- }
-}
-
-static void
-check_iovecs(struct iovec *vec, int cnt, struct iovec *avec, int acnt,
- uint32_t off_in_head)
-{
- char *s1, *s2;
- uint32_t size, asize;
-
- size = iov_length(vec, cnt);
- asize = iov_length(avec, acnt) - off_in_head;
- if (size != asize) {
- gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d", size,
- asize);
- return;
- }
- s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
- if (!s1) {
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
- if (!s2) {
- GF_FREE(s1);
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- compound_stream(vec, cnt, s1, 0);
- compound_stream(avec, acnt, s2, off_in_head);
- if (memcmp(s1, s2, size))
- gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
- GF_FREE(s1);
- GF_FREE(s2);
-}
-
-#else
-#define check_iovecs(vec, count, avec, avecn, off) noop
-#endif /* DEBUG_CRYPT */
-
-static char *
-data_alloc_block(xlator_t *this, crypt_local_t *local, int32_t block_size)
-{
- struct iobuf *iobuf = NULL;
-
- iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
- if (!iobuf) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobuf");
- return NULL;
- }
- if (!local->iobref_data) {
- local->iobref_data = iobref_new();
- if (!local->iobref_data) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobref");
- iobuf_unref(iobuf);
- return NULL;
- }
- }
- iobref_add(local->iobref_data, iobuf);
- return iobuf->ptr;
-}
-
-/*
- * Compound @avec, which represent the same data
- * chunk as @vec, but has aligned components of
- * specified block size. Alloc blocks, if needed.
- * In particular, incomplete head and tail blocks
- * must be allocated.
- * Put number of allocated blocks to @num_blocks.
- *
- * Example:
- *
- * input: data chunk represented by 4 components
- * [AB],[BC],[CD],[DE];
- * output: 5 logical blocks (0, 1, 2, 3, 4).
- *
- * A B C D E
- * *-----*+------*-+---*----+--------+-*
- * | || | | | | | |
- * *-+-----+*------+-*---+----*--------*-+------*
- * 0 1 2 3 4
- *
- * 0 - incomplete compound (head);
- * 1, 2 - full compound;
- * 3 - full non-compound (the case of reuse);
- * 4 - incomplete non-compound (tail).
- */
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf)
-{
- int vecn = 0; /* number of the current component in vec */
- int avecn = 0; /* number of the current component in avec */
- off_t vec_off = 0; /* offset in the current vec component,
- * i.e. the number of bytes have already
- * been copied */
- int32_t block_size = get_atom_size(object);
- size_t to_process; /* number of vec's bytes to copy and(or) re-use */
- int32_t off_in_head = conf->off_in_head;
-
- to_process = iov_length(vec, count);
-
- while (to_process > 0) {
- if (off_in_head || vec[vecn].iov_len - vec_off < block_size) {
- /*
- * less than block_size:
- * the case of incomplete (head or tail),
- * or compound block
- */
- size_t copied = 0;
- /*
- * populate the pool with a new block
- */
- blocks[*blocks_allocated] = data_alloc_block(this, local,
- block_size);
- if (!blocks[*blocks_allocated])
- return -ENOMEM;
- memset(blocks[*blocks_allocated], 0, off_in_head);
- /*
- * fill the block with vec components
- */
- do {
- size_t to_copy;
-
- to_copy = vec[vecn].iov_len - vec_off;
- if (to_copy > block_size - off_in_head)
- to_copy = block_size - off_in_head;
-
- memcpy(blocks[*blocks_allocated] + off_in_head + copied,
- vec[vecn].iov_base + vec_off, to_copy);
-
- copied += to_copy;
- to_process -= to_copy;
-
- vec_off += to_copy;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- } while (copied < (block_size - off_in_head) && to_process > 0);
- /*
- * update avec
- */
- avec[avecn].iov_len = off_in_head + copied;
- avec[avecn].iov_base = blocks[*blocks_allocated];
-
- (*blocks_allocated)++;
- off_in_head = 0;
- } else {
- /*
- * the rest of the current vec component
- * is not less than block_size, so reuse
- * the memory buffer of the component.
- */
- size_t to_reuse;
- to_reuse = (to_process > block_size ? block_size : to_process);
- avec[avecn].iov_len = to_reuse;
- avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
-
- vec_off += to_reuse;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- to_process -= to_reuse;
- }
- avecn++;
- }
- check_iovecs(vec, count, avec, avecn, conf->off_in_head);
- return 0;
-}
-
-/*
- * allocate and setup aligned vector for data submission
- * Pre-condition: @conf is set.
- */
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count)
-{
- int32_t ret = ENOMEM;
- struct iovec *avec;
- char **pool;
- uint32_t blocks_in_pool = 0;
-
- conf->type = DATA_ATOM;
-
- avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ret;
- pool = GF_CALLOC(conf->acount, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ret;
- }
- if (!vec) {
- /*
- * degenerated case: no data
- */
- pool[0] = data_alloc_block(this, local, get_atom_size(object));
- if (!pool[0])
- goto free;
- blocks_in_pool = 1;
- avec->iov_base = pool[0];
- avec->iov_len = conf->off_in_tail;
- } else {
- ret = align_iov_by_atoms(this, local, object, vec, vec_count, avec,
- pool, &blocks_in_pool, conf);
- if (ret)
- goto free;
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ret;
-}
-
-/*
- * allocate and setup aligned vector for hole submission
- */
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop)
-{
- uint32_t i, idx;
- struct iovec *avec;
- char **pool;
- uint32_t num_blocks;
- uint32_t blocks_in_pool = 0;
-
- conf->type = HOLE_ATOM;
-
- num_blocks = conf->acount -
- (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
-
- switch (fop) {
- case GF_FOP_WRITE:
- /*
- * hole goes before data
- */
- if (num_blocks == 1 && conf->off_in_tail != 0)
- /*
- * we won't submit a hole which fits into
- * a data atom: this part of hole will be
- * submitted with data write
- */
- return 0;
- break;
- case GF_FOP_FTRUNCATE:
- /*
- * expanding truncate, hole goes after data,
- * and will be submitted in any case.
- */
- break;
- default:
- gf_log("crypt", GF_LOG_WARNING, "bad file operation %d", fop);
- return 0;
- }
- avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ENOMEM;
- pool = GF_CALLOC(num_blocks, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ENOMEM;
- }
- for (i = 0; i < num_blocks; i++) {
- pool[i] = data_alloc_block(this, local, get_atom_size(object));
- if (pool[i] == NULL)
- goto free;
- blocks_in_pool++;
- }
- if (has_head_block(conf)) {
- /* set head block */
- idx = 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base + conf->off_in_head, 0,
- get_atom_size(object) - conf->off_in_head);
- }
- if (has_tail_block(conf)) {
- /* set tail block */
- idx = num_blocks - 1;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base, 0, conf->off_in_tail);
- }
- if (has_full_blocks(conf)) {
- /* set full block */
- idx = conf->off_in_head ? 1 : 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- /*
- * since we re-use the buffer,
- * zeroes will be set every time
- * before encryption, see submit_full()
- */
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ENOMEM;
-}
-
-/* A helper for setting up config of partial atoms (which
- * participate in read-modify-write sequence).
- *
- * Calculate and setup precise amount of "extra-bytes"
- * that should be uptodated at the end of partial (not
- * necessarily tail!) block.
- *
- * Pre-condition: local->old_file_size is valid!
- * @conf contains setup, which is enough for correct calculation
- * of has_tail_block(), ->get_offset().
- */
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype)
-{
- uint32_t to_block;
- crypt_local_t *local = frame->local;
- uint64_t old_file_size = local->old_file_size;
- struct rmw_atom *partial = atom_by_types(
- dtype, has_tail_block(conf) ? TAIL_ATOM : HEAD_ATOM);
-
- if (old_file_size <= partial->offset_at(frame, object))
- to_block = 0;
- else {
- to_block = old_file_size - partial->offset_at(frame, object);
- if (to_block > get_atom_size(object))
- to_block = get_atom_size(object);
- }
- if (to_block > conf->off_in_tail)
- conf->gap_in_tail = to_block - conf->off_in_tail;
- else
- /*
- * nothing to uptodate
- */
- conf->gap_in_tail = 0;
-}
-
-/*
- * fill struct avec_config with offsets layouts
- */
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype, int32_t set_gap)
-{
- crypt_local_t *local;
- struct object_cipher_info *object;
- struct avec_config *conf;
- uint32_t resid;
-
- uint32_t atom_size;
- uint32_t atom_bits;
-
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head = 0;
- uint32_t off_in_tail = 0;
- uint32_t nr_full_blocks;
- int32_t size_full_blocks;
-
- uint32_t acount; /* number of aligned components to write.
- * The same as number of occupied logical
- * blocks (atoms)
- */
- local = frame->local;
- object = &local->info->cinfo;
- conf = (dtype == DATA_ATOM ? get_data_conf(frame) : get_hole_conf(frame));
-
- orig_offset = offset;
- orig_size = count;
-
- atom_size = get_atom_size(object);
- atom_bits = get_atom_bits(object);
-
- /*
- * Round-down the start,
- * round-up the end.
- */
- resid = offset & (uint64_t)(atom_size - 1);
-
- if (resid)
- off_in_head = resid;
- aligned_offset = offset - off_in_head;
- expanded_size = orig_size + off_in_head;
-
- /* calculate tail,
- expand size forward */
- resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
-
- if (resid) {
- off_in_tail = resid;
- expanded_size += (atom_size - off_in_tail);
- }
- /*
- * calculate number of occupied blocks
- */
- acount = expanded_size >> atom_bits;
- /*
- * calculate number of full blocks
- */
- size_full_blocks = expanded_size;
- if (off_in_head)
- size_full_blocks -= atom_size;
- if (off_in_tail && size_full_blocks > 0)
- size_full_blocks -= atom_size;
- nr_full_blocks = size_full_blocks >> atom_bits;
-
- conf->atom_size = atom_size;
- conf->orig_size = orig_size;
- conf->orig_offset = orig_offset;
- conf->expanded_size = expanded_size;
- conf->aligned_offset = aligned_offset;
-
- conf->off_in_head = off_in_head;
- conf->off_in_tail = off_in_tail;
- conf->nr_full_blocks = nr_full_blocks;
- conf->acount = acount;
- /*
- * Finally, calculate precise amount of
- * "extra-bytes" that should be uptodated
- * at the end.
- * Only if RMW is expected.
- */
- if (off_in_tail && set_gap)
- set_gap_at_end(frame, object, conf, dtype);
-}
-
-struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
- [AES_CIPHER_ALG][XTS_CIPHER_MODE] = {.atomic = _gf_true,
- .should_pad = _gf_true,
- .blkbits = AES_BLOCK_BITS,
- .init = aes_xts_init,
- .set_private = set_private_aes_xts,
- .check_key = check_key_aes_xts,
- .set_iv = set_iv_aes_xts,
- .encrypt = encrypt_aes_xts}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
deleted file mode 100644
index a9357005a36..00000000000
--- a/xlators/encryption/crypt/src/keys.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-/* Key hierarchy
-
- +----------------+
- | MASTER_VOL_KEY |
- +-------+--------+
- |
- |
- +----------------+----------------+
- | | |
- | | |
- +-------+------+ +-------+-------+ +------+--------+
- | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
- +-------+------+ +---------------+ +---------------+
- |
- |
- +-------+-------+
- | NMTD_LINK_KEY |
- +---------------+
-
- */
-
-#if DEBUG_CRYPT
-static void
-check_prf_iters(uint32_t num_iters)
-{
- if (num_iters == 0)
- gf_log("crypt", GF_LOG_DEBUG, "bad number of prf iterations : %d",
- num_iters);
-}
-#else
-#define check_prf_iters(num_iters) noop
-#endif /* DEBUG_CRYPT */
-
-unsigned char crypt_fake_oid[16] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0};
-
-/*
- * derive key in the counter mode using
- * sha256-based HMAC as PRF, see
- * NIST Special Publication 800-108, 5.1)
- */
-
-#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
-
-static int32_t
-kderive_init(struct kderive_context *ctx,
- const unsigned char *pkey, /* parent key */
- uint32_t pkey_size, /* parent key size */
- const unsigned char *idctx, /* id-context */
- uint32_t idctx_size, crypt_key_type type /* type of child key */)
-{
- unsigned char *pos;
- uint32_t llen = strlen(crypt_keys[type].label);
- /*
- * Compoud the fixed input data for KDF:
- * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
- * NIST SP 800-108, 5.1
- */
- ctx->fid_len = sizeof(uint32_t) + llen + 1 + idctx_size + sizeof(uint32_t);
-
- ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
- if (!ctx->fid)
- return ENOMEM;
- ctx->out_len = round_up(crypt_keys[type].len >> 3, PRF_OUTPUT_SIZE);
- ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
- if (!ctx->out) {
- GF_FREE(ctx->fid);
- return ENOMEM;
- }
- ctx->pkey = pkey;
- ctx->pkey_len = pkey_size;
- ctx->ckey_len = crypt_keys[type].len;
-
- pos = ctx->fid;
-
- /* counter will be set up in kderive_rfn() */
- pos += sizeof(uint32_t);
-
- memcpy(pos, crypt_keys[type].label, llen);
- pos += llen;
-
- /* set up zero octet */
- *pos = 0;
- pos += 1;
-
- memcpy(pos, idctx, idctx_size);
- pos += idctx_size;
-
- *((uint32_t *)pos) = htobe32(ctx->ckey_len);
-
- return 0;
-}
-
-static void
-kderive_update(struct kderive_context *ctx)
-{
- uint32_t i;
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX hctx;
-#endif
- HMAC_CTX *phctx = NULL;
- unsigned char *pos = ctx->out;
- uint32_t *p_iter = (uint32_t *)ctx->fid;
- uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
-
- check_prf_iters(num_iters);
-
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_init(&hctx);
- phctx = &hctx;
-#else
- phctx = HMAC_CTX_new();
- /* I guess we presume it was successful? */
-#endif
- for (i = 0; i < num_iters; i++) {
- /*
- * update the iteration number in the fid
- */
- *p_iter = htobe32(i);
- HMAC_Init_ex(phctx, ctx->pkey, ctx->pkey_len >> 3, EVP_sha256(), NULL);
- HMAC_Update(phctx, ctx->fid, ctx->fid_len);
- HMAC_Final(phctx, pos, NULL);
-
- pos += PRF_OUTPUT_SIZE;
- }
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_cleanup(phctx);
-#else
- HMAC_CTX_free(phctx);
-#endif
-}
-
-static void
-kderive_final(struct kderive_context *ctx, unsigned char *child)
-{
- memcpy(child, ctx->out, ctx->ckey_len >> 3);
- GF_FREE(ctx->fid);
- GF_FREE(ctx->out);
- memset(ctx, 0, sizeof(*ctx));
-}
-
-/*
- * derive per-volume key for object ids aithentication
- */
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), crypt_fake_oid,
- sizeof(uuid_t), NMTD_VOL_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, master->m_nmtd_key);
- return 0;
-}
-
-/*
- * derive per-link key for aithentication of non-encrypted
- * meta-data (nmtd)
- */
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_nmtd_key, nmtd_vol_key_size(),
- (const unsigned char *)loc->path, strlen(loc->path),
- NMTD_LINK_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-/*
- * derive per-file key for encryption and authentication
- * of encrypted part of metadata (emtd)
- */
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), EMTD_FILE_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-static int32_t
-data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
-{
- int32_t ret = 0;
- switch (keysize) {
- case 256:
- *type = DATA_FILE_KEY_256;
- break;
- case 512:
- *type = DATA_FILE_KEY_512;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
- keysize);
- ret = ENOTSUP;
- break;
- }
- return ret;
-}
-
-/*
- * derive per-file key for data encryption
- */
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key)
-{
- int32_t ret;
- struct kderive_context ctx;
- crypt_key_type type;
-
- ret = data_key_type_by_size(keysize, &type);
- if (ret)
- return ret;
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), type);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, key);
- return 0;
-}
-
-/*
- * NOTE: Don't change existing keys: it will break compatibility;
- */
-struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
- [MASTER_VOL_KEY] =
- {
- .len = MASTER_VOL_KEY_SIZE << 3,
- .label = "volume-master",
- },
- [NMTD_VOL_KEY] = {.len = NMTD_VOL_KEY_SIZE << 3,
- .label = "volume-nmtd-key-generation"},
- [NMTD_LINK_KEY] = {.len = 128, .label = "link-nmtd-authentication"},
- [EMTD_FILE_KEY] = {.len = 128, .label = "file-emtd-encryption-and-auth"},
- [DATA_FILE_KEY_256] = {.len = 256, .label = "file-data-encryption-256"},
- [DATA_FILE_KEY_512] = {.len = 512, .label = "file-data-encryption-512"}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
deleted file mode 100644
index 90c53a9f721..00000000000
--- a/xlators/encryption/crypt/src/metadata.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-#include "metadata.h"
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size)
-{
- if (size > 0) {
- local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
- if (!local->format)
- return ENOMEM;
- }
- local->format_size = size;
- return 0;
-}
-
-int32_t
-alloc_format_create(crypt_local_t *local)
-{
- return alloc_format(local, new_format_size());
-}
-
-void
-free_format(crypt_local_t *local)
-{
- GF_FREE(local->format);
-}
-
-/*
- * Check compatibility with extracted metadata
- */
-static int32_t
-check_file_metadata(struct crypt_inode_info *info)
-{
- struct object_cipher_info *object = &info->cinfo;
-
- if (info->nr_minor != CRYPT_XLATOR_ID) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported minor subversion %d",
- info->nr_minor);
- return EINVAL;
- }
- if (object->o_alg > LAST_CIPHER_ALG) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher algorithm %d",
- object->o_alg);
- return EINVAL;
- }
- if (object->o_mode > LAST_CIPHER_MODE) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher mode %d",
- object->o_mode);
- return EINVAL;
- }
- if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
- object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
- object->o_block_bits);
- return EINVAL;
- }
- /* TBD: check data key size */
- return 0;
-}
-
-static size_t
-format_size_v1(mtd_op_t op, size_t old_size)
-{
- switch (op) {
- case MTD_CREATE:
- return sizeof(struct mtd_format_v1);
- case MTD_OVERWRITE:
- return old_size;
- case MTD_APPEND:
- return old_size + NMTD_8_MAC_SIZE;
- case MTD_CUT:
- if (old_size > sizeof(struct mtd_format_v1))
- return old_size - NMTD_8_MAC_SIZE;
- else
- return 0;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
- return 0;
- }
-}
-
-/*
- * Calculate size of the updated format string.
- * Returned zero means that we don't need to update the format string.
- */
-size_t
-format_size(mtd_op_t op, size_t old_size)
-{
- size_t versioned;
-
- versioned = mtd_loaders[current_mtd_loader()].format_size(
- op, old_size - sizeof(struct crypt_format));
- if (versioned != 0)
- return versioned + sizeof(struct crypt_format);
- return 0;
-}
-
-/*
- * size of the format string of newly created file (nr_links = 1)
- */
-size_t
-new_format_size(void)
-{
- return format_size(MTD_CREATE, 0);
-}
-
-/*
- * Calculate per-link MAC by pathname
- */
-static int32_t
-calc_link_mac_v1(struct mtd_format_v1 *fmt, loc_t *loc, unsigned char *result,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char nmtd_link_key[16];
- CMAC_CTX *cctx;
- size_t len;
-
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
- return -1;
- }
- cctx = CMAC_CTX_new();
- if (!cctx) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
- return -1;
- }
- ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
- EVP_aes_128_cbc(), 0);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Final(cctx, result, &len);
- CMAC_CTX_free(cctx);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
- return -1;
- }
- return 0;
-}
-
-/*
- * Create per-link MAC of index @idx by pathname
- */
-static int32_t
-create_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t idx, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
- return 0;
-}
-
-static int32_t
-create_format_v1(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- unsigned char nmtd_link_key[16];
- uint32_t ad;
- GCM128_CONTEXT *gctx;
-
- fmt = (struct mtd_format_v1 *)wire;
-
- fmt->minor_id = info->nr_minor;
- fmt->alg_id = AES_CIPHER_ALG;
- fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
- fmt->block_bits = master->m_block_bits;
- fmt->mode_id = master->m_mode;
- /*
- * retrieve keys for the parts of metadata
- */
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret)
- return ret;
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret)
- return ret;
-
- AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
-
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
-
- /* TBD: Check return values */
-
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- ret = CRYPTO_gcm128_encrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- /*
- * set MAC of encrypted part of metadata
- */
- CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
- CRYPTO_gcm128_release(gctx);
- /*
- * set the first MAC of non-encrypted part of metadata
- */
- return create_link_mac_v1(fmt, 0, loc, info, master);
-}
-
-/*
- * Called by fops:
- * ->create();
- * ->link();
- *
- * Pack common and version-specific parts of file's metadata
- * Pre-conditions: @info contains valid object-id.
- */
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- struct crypt_format *fmt = (struct crypt_format *)wire;
-
- fmt->loader_id = current_mtd_loader();
-
- wire += sizeof(struct crypt_format);
- return mtd_loaders[current_mtd_loader()].create_format(wire, loc, info,
- master);
-}
-
-/*
- * Append or overwrite per-link mac of @mac_idx index
- * in accordance with the new pathname
- */
-int32_t
-appov_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old, old_size);
- return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx, loc, info,
- master);
-}
-
-/*
- * Cut per-link mac of @mac_idx index
- */
-static int32_t
-cut_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old,
- sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
-
- memcpy(
- new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE *(mac_idx - 1),
- old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
- old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
- return 0;
-}
-
-int32_t
-update_format_v1(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, /* of old name */
- mtd_op_t op, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- switch (op) {
- case MTD_APPEND:
- mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1)) / 8;
- case MTD_OVERWRITE:
- return appov_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- case MTD_CUT:
- return cut_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
- return -1;
- }
-}
-
-/*
- * Called by fops:
- *
- * ->link()
- * ->unlink()
- * ->rename()
- *
- */
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local)
-{
- if (!new)
- return 0;
- memcpy(new, old, sizeof(struct crypt_format));
-
- old += sizeof(struct crypt_format);
- new += sizeof(struct crypt_format);
- old_len -= sizeof(struct crypt_format);
-
- return mtd_loaders[current_mtd_loader()].update_format(
- new, old, old_len, mac_idx, op, loc, info, master, local);
-}
-
-/*
- * Perform preliminary checks of found metadata
- * Return < 0 on errors;
- * Return number of object-id MACs (>= 1) on success
- */
-int32_t
-check_format_v1(uint32_t len, unsigned char *wire)
-{
- uint32_t nr_links;
-
- if (len < sizeof(struct mtd_format_v1)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata size %d", len);
- goto error;
- }
- len -= sizeof(struct mtd_format_v1);
- if (len % sizeof(nmtd_8_mac_t)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata format");
- goto error;
- }
- nr_links = 1 + len / sizeof(nmtd_8_mac_t);
- if (nr_links > _POSIX_LINK_MAX)
- goto error;
- return nr_links;
-error:
- return EIO;
-}
-
-/*
- * Verify per-link MAC specified by index @idx
- *
- * return:
- * -1 on errors;
- * 0 on failed verification;
- * 1 on successful verification
- */
-static int32_t
-verify_link_mac_v1(struct mtd_format_v1 *fmt,
- uint32_t idx /* index of the mac to verify */, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
- return 0;
- return 1;
-}
-
-/*
- * Lookup per-link MAC by pathname.
- *
- * return index of the MAC, if it was found;
- * return < 0 on errors, or if the MAC wasn't found
- */
-static int32_t
-lookup_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t nr_macs, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- uint32_t idx;
-
- for (idx = 0; idx < nr_macs; idx++) {
- ret = verify_link_mac_v1(fmt, idx, loc, info, master);
- if (ret < 0)
- return ret;
- if (ret > 0)
- return idx;
- }
- return -ENOENT;
-}
-
-/*
- * Extract version-specific part of metadata
- */
-static int32_t
-open_format_v1(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- int32_t ret;
- int32_t num_nmtd_macs;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- GCM128_CONTEXT *gctx;
- uint32_t ad;
- emtd_8_mac_t gmac;
- struct object_cipher_info *object;
-
- num_nmtd_macs = check_format_v1(len, wire);
- if (num_nmtd_macs <= 0)
- return EIO;
-
- ret = lookup_link_mac_v1((struct mtd_format_v1 *)wire, num_nmtd_macs, loc,
- info, master);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
- return EINVAL;
- }
-
- local->mac_idx = ret;
- if (load_info == _gf_false)
- /* the case of partial open */
- return 0;
-
- fmt = GF_MALLOC(len, gf_crypt_mt_mtd);
- if (!fmt)
- return ENOMEM;
- memcpy(fmt, wire, len);
-
- object = &info->cinfo;
-
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
- goto out;
- }
- /*
- * decrypt encrypted meta-data
- */
- ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
- ret = EIO;
- goto out;
- }
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
- if (!gctx) {
- gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
- ret = ENOMEM;
- goto out;
- }
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- ret = CRYPTO_gcm128_decrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- /*
- * verify metadata
- */
- CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
- CRYPTO_gcm128_release(gctx);
- if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
- gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
- ret = EINVAL;
- goto out;
- }
- /*
- * load verified metadata to the private part of inode
- */
- info->nr_minor = fmt->minor_id;
-
- object->o_alg = fmt->alg_id;
- object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
- object->o_block_bits = fmt->block_bits;
- object->o_mode = fmt->mode_id;
-
- ret = check_file_metadata(info);
-out:
- GF_FREE(fmt);
- return ret;
-}
-
-/*
- * perform metadata authentication against @loc->path;
- * extract crypt-specific attribute and populate @info
- * with them (optional)
- */
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- struct crypt_format *fmt;
- if (len < sizeof(*fmt)) {
- gf_log("crypt", GF_LOG_ERROR, "Bad core format");
- return EIO;
- }
- fmt = (struct crypt_format *)str;
-
- if (fmt->loader_id >= LAST_MTD_LOADER) {
- gf_log("crypt", GF_LOG_ERROR, "Unsupported loader id %d",
- fmt->loader_id);
- return EINVAL;
- }
- str += sizeof(*fmt);
- len -= sizeof(*fmt);
-
- return mtd_loaders[fmt->loader_id].open_format(str, len, loc, info, master,
- local, load_info);
-}
-
-struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER] = {
- [MTD_LOADER_V1] = {.format_size = format_size_v1,
- .create_format = create_format_v1,
- .open_format = open_format_v1,
- .update_format = update_format_v1}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
deleted file mode 100644
index 0bcee1b18c8..00000000000
--- a/xlators/encryption/crypt/src/metadata.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __METADATA_H__
-#define __METADATA_H__
-
-#define NMTD_8_MAC_SIZE (8)
-#define EMTD_8_MAC_SIZE (8)
-
-typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
-typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE];
-
-/*
- * Version "v1" of file's metadata.
- * Metadata of this version has 4 components:
- *
- * 1) EMTD (Encrypted part of MeTaData);
- * 2) NMTD (Non-encrypted part of MeTaData);
- * 3) EMTD_MAC; (EMTD Message Authentication Code);
- * 4) Array of per-link NMTD MACs (for every (hard)link it includes
- * exactly one MAC)
- */
-struct mtd_format_v1 {
- /* EMTD, encrypted part of meta-data */
- uint8_t alg_id; /* cipher algorithm id (only AES for now) */
- uint8_t mode_id; /* cipher mode id; (only XTS for now) */
- uint8_t block_bits; /* encoded block size */
- uint8_t minor_id; /* client translator id */
- uint8_t dkey_factor; /* encoded size of the data key */
- /* MACs */
- emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
- nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
- * meta-data: at least one such MAC is always
- * present */
-} __attribute__((packed));
-
-/*
- * NMTD, the non-encrypted part of metadata of version "v1"
- * is file's gfid, which is generated on trusted machines.
- */
-#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
-#define SIZE_OF_EMTD_V1 \
- (offsetof(struct mtd_format_v1, gmac) - \
- offsetof(struct mtd_format_v1, alg_id))
-#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
-#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
-
-static inline unsigned char *
-get_EMTD_V1(struct mtd_format_v1 *format)
-{
- return &format->alg_id;
-}
-
-static inline unsigned char *
-get_NMTD_V1(struct crypt_inode_info *info)
-{
- return info->oid;
-}
-
-static inline unsigned char *
-get_EMTD_V1_MAC(struct mtd_format_v1 *format)
-{
- return format->gmac;
-}
-
-static inline unsigned char *
-get_NMTD_V1_MAC(struct mtd_format_v1 *format)
-{
- return format->omac;
-}
-
-#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/Makefile.am b/xlators/encryption/rot-13/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/encryption/rot-13/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am
deleted file mode 100644
index a530845c4c0..00000000000
--- a/xlators/experimental/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = jbr-client jbr-server fdl dht2 posix2
-
-CLEANFILES =
diff --git a/xlators/experimental/README.md b/xlators/experimental/README.md
deleted file mode 100644
index b00f24e114b..00000000000
--- a/xlators/experimental/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Purpose of this directory
-
-This directory is created to host experimental gluster translators. A new
-translator that is *experimental* in nature, would need to create its own
-subdirectory under this directory, to host/publish its work.
-
-Example:
- The first commit should include the following changes
- 1. xlators/experimental/Makefile.am
- NOTE: Add foobar to the list of SUBDIRS here
- 2. xlators/experimental/foobar
- 3. xlators/experimental/foobar/Makefle.am
- NOTE: Can be empty initially in the first commit
- 4. configure.ac
- NOTE: Include your experimental Makefile under AC_CONFIG_FILES
- 5. xlators/experimental/foobar/README.md
- NOTE: The readme should cover details as required for the translator to be
- accepted as experimental, primarily including a link to the specification
- under the gluster-specs repository [1]. Later the readme should suffice
- as an entry point for developers and users alike, who wish to experiment
- with the xlator under development
- 6. xlators/experimental/foobar/TODO.md
- NOTE: This is a list of TODO's identified during the development process
- that needs addressing over time. These include exceptions granted during
- the review process, for things not addressed when commits are merged into
- the repository
-
-# Why is it provided
-
-Quite often translator development that happens out of tree, does not get
-enough eyeballs early in its development phase, has not undergone CI
-(regression/continuous integration testing), and at times is not well integrated
-with the rest of gluster stack.
-
-Also, when such out of tree translators are submitted for acceptance, it is a
-bulk commit that makes review difficult and inefficient. Such submissions also
-have to be merged forward, and depending on the time spent in developing the
-translator the master branch could have moved far ahead, making this a painful
-activity.
-
-Experimental is born out of such needs, to provide xlator developers,
- - Early access to CI
- - Ability to adapt to ongoing changes in other parts of gluster
- - More eye balls on the code and design aspects of the translator
- - TBD: What else?
-
-and for maintainers,
- - Ability to look at smaller change sets in the review process
- - Ability to verify/check implementation against the specification provided
-
-# General rules
-
-1. If a new translator is added under here it should, at the very least, pass
-compilation.
-
-2. All translators under the experimental directory are shipped as a part of
-gluster-experimental RPMs.
-TBD: Spec file and other artifacts for the gluster-experimental RPM needs to be
-fleshed out.
-
-3. Experimental translators can leverage the CI framework as needed. Tests need
-to be hosted under xlators/experimental/tests initially, and later moved to the
-appropriate tests/ directory as the xlator matures. It is encouraged to provide
-tests for each commit or series of commits, so that code and tests can be
-inspected together.
-
-4. If any experimental translator breaks CI, it is quarantined till demonstrable
-proof towards the contrary is provided. This is applicable as tests are moved
-out of experimental tests directory to the CI framework directory, as otherwise
-experimental tests are not a part of regular CI regression runs.
-
-5. An experimental translator need not function at all, as a result commits can
-be merged pretty much at will as long as other rules as stated are not violated.
-
-6. Experimental submissions will be assigned a existing maintainer, to aid
-merging commits and ensure aspects of gluster code submissions are respected.
-When an experimental xlator is proposed and the first commit posted
-a mail to gluster-devel@gluster.org requesting attention, will assign the
-maintainer buddy for the submission.
-NOTE: As we scale, this may change.
-
-6. More?
-
-# Getting out of the experimental jail
-
-So you now think your xlator is ready to leave experimental and become part of
-mainline!
-- TBD: guidelines pending.
-
-# FAQs
-
-1. How do I submit/commit experimental framework changes outside of my
-experimental xlator?
- - Provide such framework changes as a separate commit
- - Conditionally ensure these are built or activated only when the experimental
- feature is activated, so as to prevent normal gluster workflow to function as
- before
- - TBD: guidelines and/or examples pending.
-
-2. Ask your question either on gluster-devel@gluster.org or as a change request
-to this file in gluster gerrit [2] for an answer that will be assimilated into
-this readme.
-
-# Links
-[1] http://review.gluster.org/#/q/project:glusterfs-specs
-
-[2] http://review.gluster.org/#/q/project:glusterfs
diff --git a/xlators/experimental/dht2/Makefile.am b/xlators/experimental/dht2/Makefile.am
deleted file mode 100644
index 9d910a66056..00000000000
--- a/xlators/experimental/dht2/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = dht2-client dht2-server
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/README.md b/xlators/experimental/dht2/README.md
deleted file mode 100644
index 8f249a83673..00000000000
--- a/xlators/experimental/dht2/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# DHT2 Experimental README
-
-DHT2 is the new distribution scheme being developed for Gluster, that
-aims to remove the subdirectory spread across all DHT subvolumes.
-
-As a result of this work, the Gluster backend file layouts and on disk
-representation of directories and files are modified, thus making DHT2
-volumes incompatible to existing DHT based Gluster deployments.
-
-This document presents interested users with relevant data to play around
-with DHT2 volumes and provide feedback towards the same.
-
-REMOVEME: Design details currently under review here,
- - http://review.gluster.org/#/c/13395/
-
-TODO: Add more information as relevant code is pulled in
-
-# Directory strucutre elaborated
-
-## dht2-server
-This directory contains code for the server side DHT2 xlator. This xlator is
-intended to run on the brick graph, and is responsible for FOP synchronization,
-redirection, transactions, and journal replays.
-
-NOTE: The server side code also handles changes to volume/cluster map and
-also any rebalance activities.
-
-## dht2-client
-This directory contains code for the client side DHT2 xlator. This xlator is
-intended to run on the client/access protocol/mount graph, and is responsible
-for FOP routing to the right DHT2 subvolume. It uses a volume/cluster wide map
-of the routing (layout), to achieve the same.
-
-## dht2-common
-This directory contains code that is used in common across other parts of DHT2.
-For example, FOP routing store/consult abstractions that are common across the
-client and server side of DHT2.
-
-## Issue: How to build dht2-common?
- 1. Build a shared object
- - We cannot ship this as a part of both the client xlator RPM
- 2. Build an archive
- - Symbol clashes? when both the client and server xlators are loaded as a
- part of the same graph
- 3. Compile with other parts of the code that needs it
- - Not a very different from (2) above
- - This is what is chosen at present, and maybe would be revised later
diff --git a/xlators/experimental/dht2/TODO.md b/xlators/experimental/dht2/TODO.md
deleted file mode 100644
index 1e2c53c5b36..00000000000
--- a/xlators/experimental/dht2/TODO.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# DHT2 TODO list
-
-<Items will be added as code is pulled into the repository>
diff --git a/xlators/experimental/dht2/dht2-client/src/Makefile.am b/xlators/experimental/dht2/dht2-client/src/Makefile.am
deleted file mode 100644
index 3a13a2a3986..00000000000
--- a/xlators/experimental/dht2/dht2-client/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-xlator_LTLIBRARIES = dht2c.la
-
-dht2c_sources = dht2-client-main.c
-
-dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
-
-dht2c_la_SOURCES = $(dht2c_sources) $(dht2common_sources)
-dht2c_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-dht2c_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
deleted file mode 100644
index 556385724a4..00000000000
--- a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-client-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-dht2_client_init(xlator_t *this)
-{
- if (!this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "Missing children in volume graph, this (%s) is"
- " not a leaf translator",
- this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-dht2_client_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = dht2_client_init,
- .fini = dht2_client_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c b/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
deleted file mode 100644
index d959483b8a4..00000000000
--- a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-common-map.c
- * This file contains helper routines to store, consult, the volume map
- * for subvolume to GFID relations.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "statedump.h"
diff --git a/xlators/experimental/dht2/dht2-server/src/Makefile.am b/xlators/experimental/dht2/dht2-server/src/Makefile.am
deleted file mode 100644
index c76fab0ca74..00000000000
--- a/xlators/experimental/dht2/dht2-server/src/Makefile.am
+++ /dev/null
@@ -1,23 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = dht2s.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-dht2s_sources = dht2-server-main.c
-
-dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
-
-dht2s_la_SOURCES = $(dht2s_sources) $(dht2common_sources)
-dht2s_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-dht2s_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
deleted file mode 100644
index f051a44e99f..00000000000
--- a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-server-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-dht2_server_init(xlator_t *this)
-{
- if (!this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "Missing children in volume graph, this (%s) is"
- " not a leaf translator",
- this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-dht2_server_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = dht2_server_init,
- .fini = dht2_server_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/fdl/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am
deleted file mode 100644
index bdcaaf6c38d..00000000000
--- a/xlators/experimental/fdl/src/Makefile.am
+++ /dev/null
@@ -1,48 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-if WITH_SERVER
-xlator_LTLIBRARIES = fdl.la
-endif
-
-noinst_HEADERS = fdl.h
-
-nodist_fdl_la_SOURCES = fdl.c
-fdl_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-if WITH_SERVER
-sbin_PROGRAMS = gf_logdump gf_recon
-endif
-gf_logdump_SOURCES = logdump.c
-nodist_gf_logdump_SOURCES = libfdl.c
-gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS)
-
-# Eventually recon(ciliation) code will move elsewhere, but for now it's
-# easier to have it next to the similar logdump code.
-gf_recon_SOURCES = recon.c
-nodist_gf_recon_SOURCES = librecon.c
-gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/api/src -fPIC \
- -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
- -DDATADIR=\"$(localstatedir)\"
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py
-EXTRA_DIST = fdl-tmpl.c.in dump-tmpl.c.in recon-tmpl.c.in
-
-CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \
- $(nodist_gf_recon_SOURCES)
-
-fdl.c: fdl-tmpl.c.in gen_fdl.py
- $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c.in > $@
-
-libfdl.c: dump-tmpl.c.in gen_dumper.py
- $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c.in > $@
-
-librecon.c: recon-tmpl.c.in gen_recon.py
- $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c.in > $@
diff --git a/xlators/experimental/fdl/src/dump-tmpl.c.in b/xlators/experimental/fdl/src/dump-tmpl.c.in
deleted file mode 100644
index 97249ac3e71..00000000000
--- a/xlators/experimental/fdl/src/dump-tmpl.c.in
+++ /dev/null
@@ -1,177 +0,0 @@
-#pragma fragment PROLOG
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#include <ctype.h>
-#endif
-
-#include "glfs.h"
-#include "iatt.h"
-#include "xlator.h"
-#include "fdl.h"
-
-/*
- * Returns 0 if the string is ASCII printable *
- * and -1 if it's not ASCII printable *
- */
-int
-str_isprint(char *s)
-{
- int ret = -1;
-
- if (!s)
- goto out;
-
- while (s[0] != '\0') {
- if (!isprint(s[0]))
- goto out;
- else
- s++;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-#pragma fragment DICT
-{
- int key_len, data_len;
- char *key_ptr;
- char *key_val;
- printf("@ARGNAME@ = dict {\n");
- for (;;) {
- key_len = *((int *)new_meta);
- new_meta += sizeof(int);
- if (!key_len) {
- break;
- }
- key_ptr = new_meta;
- new_meta += key_len;
- data_len = *((int *)new_meta);
- key_val = new_meta + sizeof(int);
- new_meta += sizeof(int) + data_len;
- if (str_isprint(key_val))
- printf(" %s = <%d bytes>\n", key_ptr, data_len);
- else
- printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len);
- }
- printf("}\n");
-}
-
-#pragma fragment DOUBLE
-printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
- *((uint64_t *)new_meta));
-new_meta += sizeof(uint64_t);
-
-#pragma fragment GFID
-printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-
-#pragma fragment INTEGER
-printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
- *((uint32_t *)new_meta));
-new_meta += sizeof(uint32_t);
-
-#pragma fragment LOC
-printf("@ARGNAME@ = loc {\n");
-printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-if (*(new_meta++)) {
- printf(" name = %s\n", new_meta);
- new_meta += (strlen(new_meta) + 1);
-}
-printf("}\n");
-
-#pragma fragment STRING
-if (*(new_meta++)) {
- printf("@ARGNAME@ = %s\n", new_meta);
- new_meta += (strlen(new_meta) + 1);
-}
-
-#pragma fragment VECTOR
-{
- size_t len = *((size_t *)new_meta);
- new_meta += sizeof(len);
- printf("@ARGNAME@ = <%zu bytes>\n", len);
- new_data += len;
-}
-
-#pragma fragment IATT
-{
- ia_prot_t *myprot = ((ia_prot_t *)new_meta);
- printf("@ARGNAME@ = iatt {\n");
- printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-',
- myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-');
- printf("%c%c%c", myprot->owner.read ? 'r' : '-',
- myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-');
- printf("%c%c%c", myprot->group.read ? 'r' : '-',
- myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-');
- printf("%c%c%c\n", myprot->other.read ? 'r' : '-',
- myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-');
- new_meta += sizeof(ia_prot_t);
- uint32_t *myints = (uint32_t *)new_meta;
- printf(" ia_uid = %u\n", myints[0]);
- printf(" ia_gid = %u\n", myints[1]);
- printf(" ia_atime = %u.%09u\n", myints[2], myints[3]);
- printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]);
- new_meta += sizeof(*myints) * 6;
-}
-
-#pragma fragment FOP
-void fdl_dump_@NAME@(char **old_meta, char **old_data)
-{
- char *new_meta = *old_meta;
- char *new_data = *old_data;
-
- /* TBD: word size/endianness */
- @FUNCTION_BODY@
-
- *old_meta = new_meta;
- *old_data = new_data;
-}
-
-#pragma fragment CASE
-case GF_FOP_@UPNAME@:
- printf("=== GF_FOP_@UPNAME@\n");
- fdl_dump_@NAME@(&new_meta, &new_data);
- break;
-
-#pragma fragment EPILOG
- int
- fdl_dump(char **old_meta, char **old_data)
- {
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- static glfs_t *fs = NULL;
- int recognized = 1;
- event_header_t *eh;
-
- /*
- * We don't really call anything else in GFAPI, but this is the most
- * convenient way to satisfy all of the spurious dependencies on how it
- * or glusterfsd initialize (e.g. setting up THIS).
- */
- if (!fs) {
- fs = glfs_new("dummy");
- }
-
- eh = (event_header_t *)new_meta;
- new_meta += sizeof(*eh);
-
- /* TBD: check event_type instead of assuming NEW_REQUEST */
-
- switch (eh->fop_type) {
- @SWITCH_BODY@
-
- default :
- printf("unknown fop %u\n", eh->fop_type);
- recognized = 0;
- }
-
- *old_meta = new_meta;
- *old_data = new_data;
- return recognized;
- }
diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c.in b/xlators/experimental/fdl/src/fdl-tmpl.c.in
deleted file mode 100644
index c99157be957..00000000000
--- a/xlators/experimental/fdl/src/fdl-tmpl.c.in
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include "call-stub.h"
-#include "iatt.h"
-#include "defaults.h"
-#include "syscall.h"
-#include "xlator.h"
-#include "fdl.h"
-
-/* TBD: make tunable */
-#define META_FILE_SIZE (1 << 20)
-#define DATA_FILE_SIZE (1 << 24)
-
-enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end };
-
-typedef struct {
- char *type;
- off_t size;
- char *path;
- int fd;
- void *ptr;
- off_t max_offset;
-} log_obj_t;
-
-typedef struct {
- struct list_head reqs;
- pthread_mutex_t req_lock;
- pthread_cond_t req_cond;
- char *log_dir;
- pthread_t worker;
- gf_boolean_t should_stop;
- gf_boolean_t change_term;
- log_obj_t meta_log;
- log_obj_t data_log;
- int term;
- int first_term;
-} fdl_private_t;
-
-int32_t
-fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
-
-void
-fdl_enqueue(xlator_t *this, call_stub_t *stub)
-{
- fdl_private_t *priv = this->private;
-
- pthread_mutex_lock(&priv->req_lock);
- list_add_tail(&stub->list, &priv->reqs);
- pthread_mutex_unlock(&priv->req_lock);
-
- pthread_cond_signal(&priv->req_cond);
-}
-
-#pragma generate
-
-char *
-fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term)
-{
- fdl_private_t *priv = this->private;
- int ret;
- char *ptr = NULL;
-
- /*
- * Use .jnl instead of .log so that we don't get test info (mistakenly)
- * appended to our journal files.
- */
- if (this->ctx->cmd_args.log_ident) {
- ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir,
- this->ctx->cmd_args.log_ident, obj->type, term);
- } else {
- ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir,
- obj->type, term);
- }
- if ((ret <= 0) || !obj->path) {
- gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path");
- goto err;
- }
-
- gf_log(this->name, GF_LOG_INFO, "opening %s (size %" PRId64 ")", obj->path,
- obj->size);
-
- obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (obj->fd < 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)",
- strerror(errno));
- goto err;
- }
-
-#if !defined(GF_BSD_HOST_OS)
- /*
- * NetBSD can just go die in a fire. Even though it claims to support
- * fallocate/posix_fallocate they don't actually *do* anything so the
- * file size remains zero. Then mmap succeeds anyway, but any access
- * to the mmap'ed region will segfault. It would be acceptable for
- * fallocate to do what it says, for mmap to fail, or for access to
- * extend the file. NetBSD managed to hit the trifecta of Getting
- * Everything Wrong, and debugging in that environment to get this far
- * has already been painful enough (systems I worked on in 1990 were
- * better that way). We'll fall through to the lseek/write method, and
- * performance will be worse, and TOO BAD.
- */
- if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0)
-#endif
- {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to fallocate space for log file");
- /* Have to do this the ugly page-faulty way. */
- (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET);
- (void)sys_write(obj->fd, "", 1);
- }
-
- ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
- if (ptr == MAP_FAILED) {
- gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
- strerror(errno));
- goto err;
- }
-
- obj->ptr = ptr;
- obj->max_offset = 0;
- return ptr;
-
-err:
- if (obj->fd >= 0) {
- sys_close(obj->fd);
- obj->fd = (-1);
- }
- if (obj->path) {
- GF_FREE(obj->path);
- obj->path = NULL;
- }
- return ptr;
-}
-
-void
-fdl_close_term_log(xlator_t *this, log_obj_t *obj)
-{
- fdl_private_t *priv = this->private;
-
- if (obj->ptr) {
- (void)munmap(obj->ptr, obj->size);
- obj->ptr = NULL;
- }
-
- if (obj->fd >= 0) {
- gf_log(this->name, GF_LOG_INFO,
- "truncating term %d %s journal to %" PRId64,
- priv->term, obj->type, obj->max_offset);
- if (sys_ftruncate(obj->fd, obj->max_offset) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to truncate journal (%s)", strerror(errno));
- }
- sys_close(obj->fd);
- obj->fd = (-1);
- }
-
- if (obj->path) {
- GF_FREE(obj->path);
- obj->path = NULL;
- }
-}
-
-gf_boolean_t
-fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr)
-{
- fdl_private_t *priv = this->private;
-
- fdl_close_term_log(this, &priv->meta_log);
- fdl_close_term_log(this, &priv->data_log);
-
- ++(priv->term);
-
- *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
- if (!*meta_ptr) {
- return _gf_false;
- }
-
- *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
- if (!*data_ptr) {
- return _gf_false;
- }
-
- return _gf_true;
-}
-
-void *
-fdl_worker(void *arg)
-{
- xlator_t *this = arg;
- fdl_private_t *priv = this->private;
- call_stub_t *stub;
- char *meta_ptr = NULL;
- off_t *meta_offset = &priv->meta_log.max_offset;
- char *data_ptr = NULL;
- off_t *data_offset = &priv->data_log.max_offset;
- unsigned long base_as_ul;
- void *msync_ptr;
- size_t msync_len;
- gf_boolean_t recycle;
- void *err_label = &&err_unlocked;
-
- priv->meta_log.type = "meta";
- priv->meta_log.size = META_FILE_SIZE;
- priv->meta_log.path = NULL;
- priv->meta_log.fd = (-1);
- priv->meta_log.ptr = NULL;
-
- priv->data_log.type = "data";
- priv->data_log.size = DATA_FILE_SIZE;
- priv->data_log.path = NULL;
- priv->data_log.fd = (-1);
- priv->data_log.ptr = NULL;
-
- /* TBD: initial term should come from persistent storage (e.g. etcd) */
- priv->first_term = ++(priv->term);
- meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
- if (!meta_ptr) {
- goto *err_label;
- }
- data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
- if (!data_ptr) {
- fdl_close_term_log(this, &priv->meta_log);
- goto *err_label;
- }
-
- for (;;) {
- pthread_mutex_lock(&priv->req_lock);
- err_label = &&err_locked;
- while (list_empty(&priv->reqs)) {
- pthread_cond_wait(&priv->req_cond, &priv->req_lock);
- if (priv->should_stop) {
- goto *err_label;
- }
- if (priv->change_term) {
- if (!fdl_change_term(this, &meta_ptr, &data_ptr)) {
- goto *err_label;
- }
- priv->change_term = _gf_false;
- continue;
- }
- }
- stub = list_entry(priv->reqs.next, call_stub_t, list);
- list_del_init(&stub->list);
- pthread_mutex_unlock(&priv->req_lock);
- err_label = &&err_unlocked;
- /*
- * TBD: batch requests
- *
- * What we should do here is gather up *all* of the requests
- * that have accumulated since we were last at this point,
- * blast them all out in one big writev, and then dispatch them
- * all before coming back for more. That maximizes throughput,
- * at some cost to latency (due to queuing effects at the log
- * stage). Note that we're likely to be above io-threads, so
- * the dispatch itself will be parallelized (at further cost to
- * latency). For now, we just do the simplest thing and handle
- * one request all the way through before fetching the next.
- *
- * So, why mmap/msync instead of writev/fdatasync? Because it's
- * faster. Much faster. So much faster that I half-suspect
- * cheating, but it's more convenient for now than having to
- * ensure that everything's page-aligned for O_DIRECT (the only
- * alternative that still might avoid ridiculous levels of
- * local-FS overhead).
- *
- * TBD: check that msync really does get our data to disk.
- */
- gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d",
- stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
- recycle = _gf_false;
- if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
- recycle = _gf_true;
- }
- if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
- recycle = _gf_true;
- }
- if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) {
- goto *err_label;
- }
- meta_ptr = priv->meta_log.ptr;
- data_ptr = priv->data_log.ptr;
- gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p",
- meta_ptr + *meta_offset, data_ptr + *data_offset);
- stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset);
- if (stub->jnl_meta_len > 0) {
- base_as_ul = (unsigned long)(meta_ptr + *meta_offset);
- msync_ptr = (void *)(base_as_ul & ~0x0fff);
- msync_len = (size_t)(base_as_ul & 0x0fff);
- if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to log request meta (%s)", strerror(errno));
- }
- *meta_offset += stub->jnl_meta_len;
- }
- if (stub->jnl_data_len > 0) {
- base_as_ul = (unsigned long)(data_ptr + *data_offset);
- msync_ptr = (void *)(base_as_ul & ~0x0fff);
- msync_len = (size_t)(base_as_ul & 0x0fff);
- if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to log request data (%s)", strerror(errno));
- }
- *data_offset += stub->jnl_data_len;
- }
- call_resume(stub);
- }
-
-err_locked:
- pthread_mutex_unlock(&priv->req_lock);
-err_unlocked:
- fdl_close_term_log(this, &priv->meta_log);
- fdl_close_term_log(this, &priv->data_log);
- return NULL;
-}
-
-int32_t
-fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- /*
- * Nothing to be done here. Just Unwind. *
- */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata);
-
- return 0;
-}
-
-int32_t
-fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- call_stub_t *stub;
- fdl_private_t *priv = this->private;
- dict_t *tdict;
- int32_t gt_err = EIO;
-
- switch (op) {
- case FDL_IPC_CHANGE_TERM:
- gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op");
- priv->change_term = _gf_true;
- pthread_cond_signal(&priv->req_cond);
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
- break;
-
- case FDL_IPC_GET_TERMS:
- gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op");
- tdict = dict_new();
- if (!tdict) {
- gt_err = ENOMEM;
- goto gt_done;
- }
- if (dict_set_int32(tdict, "first", priv->first_term) != 0) {
- goto gt_done;
- }
- if (dict_set_int32(tdict, "last", priv->term) != 0) {
- goto gt_done;
- }
- gt_err = 0;
- gt_done:
- if (gt_err) {
- STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL);
- } else {
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict);
- }
- if (tdict) {
- dict_unref(tdict);
- }
- break;
-
- case FDL_IPC_JBR_SERVER_ROLLBACK:
- /*
- * In case of a rollback from jbr-server, dump *
- * the term and index number in the journal, *
- * which will later be used to rollback the fop *
- */
- stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata);
- fdl_len_ipc(stub);
- stub->serialize = fdl_serialize_ipc;
- fdl_enqueue(this, stub);
-
- break;
-
- default:
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, xdata);
- }
-
- return 0;
-}
-
-int
-fdl_init(xlator_t *this)
-{
- fdl_private_t *priv = NULL;
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t);
- if (!priv) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private");
- goto err;
- }
-
- INIT_LIST_HEAD(&priv->reqs);
- if (pthread_mutex_init(&priv->req_lock, NULL) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock");
- goto err;
- }
- if (pthread_cond_init(&priv->req_cond, NULL) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond");
- goto err;
- }
-
- GF_OPTION_INIT("log-path", priv->log_dir, path, err);
-
- this->private = priv;
- /*
- * The rest of the fop table is automatically generated, so this is a
- * bit cleaner than messing with the generation to add a hand-written
- * exception.
- */
-
- if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") !=
- 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker");
- goto err;
- }
-
- return 0;
-
-err:
- if (priv) {
- GF_FREE(priv);
- }
- return -1;
-}
-
-void
-fdl_fini(xlator_t *this)
-{
- fdl_private_t *priv = this->private;
-
- if (priv) {
- priv->should_stop = _gf_true;
- pthread_cond_signal(&priv->req_cond);
- pthread_join(priv->worker, NULL);
- GF_FREE(priv);
- }
-}
-
-int
-fdl_reconfigure(xlator_t *this, dict_t *options)
-{
- fdl_private_t *priv = this->private;
-
- GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out);
- /* TBD: react if it changed */
-
-out:
- return 0;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("fdl", this, out);
-
- ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-class_methods_t class_methods = {
- .init = fdl_init,
- .fini = fdl_fini,
- .reconfigure = fdl_reconfigure,
- .notify = default_notify,
-};
-
-struct volume_options options[] = {
- {.key = {"log-path"},
- .type = GF_OPTION_TYPE_PATH,
- .default_value = DEFAULT_LOG_FILE_DIRECTORY,
- .description = "Directory for FDL files."},
- {.key = {NULL}},
-};
-
-struct xlator_cbks cbks = {
- .release = default_release,
- .releasedir = default_releasedir,
- .forget = default_forget,
-};
diff --git a/xlators/experimental/fdl/src/fdl.h b/xlators/experimental/fdl/src/fdl.h
deleted file mode 100644
index 827db9f1246..00000000000
--- a/xlators/experimental/fdl/src/fdl.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FDL_H_
-#define _FDL_H_
-
-#define NEW_REQUEST (uint8_t)'N'
-
-typedef struct {
- uint8_t event_type; /* e.g. NEW_REQUEST */
- uint8_t fop_type; /* e.g. GF_FOP_SETATTR */
- uint16_t request_id;
- uint32_t ext_length;
-} event_header_t;
-
-enum {
- FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */
- FDL_IPC_CHANGE_TERM,
- FDL_IPC_GET_TERMS,
- FDL_IPC_JBR_SERVER_ROLLBACK
-};
-
-#endif /* _FDL_H_ */
diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py
deleted file mode 100755
index 630b54492f7..00000000000
--- a/xlators/experimental/fdl/src/gen_dumper.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# See the big header comment at the start of gen_fdl.py to see how the stages
-# fit together. The big difference here is that *all* of the C code is in the
-# template file as labelled fragments, instead of as Python strings. That
-# makes it much easier to edit in one place, with proper syntax highlighting
-# and indentation.
-#
-# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
-# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
-#
-# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
-# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
-# in the middle of each function) is emitted immediately; the expanded CASE
-# code is saved for the next stage.
-#
-# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
-# in the middle of EPILOG, to generate the whole output file.
-#
-# Another way of looking at it is to consider how the fragments appear in
-# the final output:
-#
-# PROLOG
-# FOP (expanded for CREATE)
-# FOP before FUNCTION_BODY
-# LOC, INTEGER, GFID, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# FOP (expanded for WRITEV)
-# FOP before FUNCTION_BODY
-# GFID, VECTOR, etc. (on per arg, by type)
-# FOP after FUNCTION_BODY
-# (more FOPs)
-# EPILOG
-# EPILOG before CASE
-# CASE statements (one per fop)
-# EPILOG after CASE
-
-typemap = {
- 'dict_t *': ( "DICT", ""),
- 'fd_t *': ( "GFID", ""),
- 'dev_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"),
- 'int32_t': ( "INTEGER", "%d (0x%x)"),
- 'mode_t': ( "INTEGER", "%d (0x%x)"),
- 'off_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'size_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'uint32_t': ( "INTEGER", "%d (0x%x)"),
- 'loc_t *': ( "LOC", ""),
- 'const char *': ( "STRING", ""),
- 'struct iovec *': ( "VECTOR", ""),
- 'struct iatt *': ( "IATT", ""),
-}
-
-def get_special_subs (args):
- code = ""
- for arg in args:
- if (arg[0] != 'fop-arg') or (len(arg) < 4):
- continue
- recon_type, recon_fmt = typemap[arg[2]]
- code += fragments[recon_type].replace("@ARGNAME@", arg[3]) \
- .replace("@FORMAT@", recon_fmt)
- return code
-
-def gen_functions ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value)
- # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
- code += generate(fragments["FOP"], name, fop_subs)
- return code
-
-def gen_cases ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- # Add the CASE fragment for this fop.
- code += generate(fragments["CASE"], name, fop_subs)
- return code
-
-def load_fragments (path="recon-tmpl.c"):
- pragma_re = re.compile('pragma fragment (.*)')
- cur_symbol = None
- cur_value = ""
- result = {}
- for line in open(path, "r").readlines():
- m = pragma_re.search(line)
- if m:
- if cur_symbol:
- result[cur_symbol] = cur_value
- cur_symbol = m.group(1)
- cur_value = ""
- else:
- cur_value += line
- if cur_symbol:
- result[cur_symbol] = cur_value
- return result
-
-if __name__ == "__main__":
- fragments = load_fragments(sys.argv[1])
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- print(fragments["PROLOG"])
- print(gen_functions())
- print(fragments["EPILOG"].replace("@SWITCH_BODY@", gen_cases()))
- print("/* END GENERATED CODE */")
diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py
deleted file mode 100755
index 467ec8927b7..00000000000
--- a/xlators/experimental/fdl/src/gen_fdl.py
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# Generation occurs in three stages. In this case, it actually makes more
-# sense to discuss them in the *opposite* order of that in which they
-# actually happen.
-#
-# Stage 3 is to insert all of the generated code into a file, replacing the
-# "#pragma generate" that's already there. The file can thus contain all
-# sorts of stuff that's not specific to one fop, either before or after the
-# generated code as appropriate.
-#
-# Stage 2 is to generate all of the code *for a particular fop*, using a
-# string-valued template plus a table of substitution values. Most of these
-# are built in to the generator itself. However, we also add a couple that
-# are specific to this particular translator - LEN_CODE and SER_CODE. These
-# are per-fop functions to get the length or the contents (respectively) of
-# what we'll put in the log. As with stage 3 allowing per-file boilerplate
-# before and after generated code, this allows per-fop boilerplate before and
-# after generated code.
-#
-# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for
-# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS
-# already are. We do this by looking at the fop-description table in the
-# generator module, then doing out own template substitution to plug each
-# specific argument name into another string-valued template.
-#
-# So, what does this leave us with in terms of variables and files?
-#
-# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE
-# strings, which are used to generate the length and serialization code for
-# each argument type.
-#
-# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_
-# prefix), which are used (along with the output from stage 1) to generate
-# whole functions.
-#
-# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert
-# the collection of all functions defined in stage 2.
-
-
-LEN_TEMPLATE = """
-void
-fdl_len_@NAME@ (call_stub_t *stub)
-{
- uint32_t meta_len = sizeof (event_header_t);
- uint32_t data_len = 0;
-
- /* TBD: global stuff, e.g. uid/gid */
-@LEN_CODE@
-
- /* TBD: pad extension length */
- stub->jnl_meta_len = meta_len;
- stub->jnl_data_len = data_len;
-}
-"""
-
-SER_TEMPLATE = """
-void
-fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf)
-{
- event_header_t *eh;
- unsigned long offset = 0;
-
- /* TBD: word size/endianness */
- eh = (event_header_t *)meta_buf;
- eh->event_type = NEW_REQUEST;
- eh->fop_type = GF_FOP_@UPNAME@;
- eh->request_id = 0; // TBD
- meta_buf += sizeof (*eh);
-@SER_CODE@
- /* TBD: pad extension length */
- eh->ext_length = offset;
-}
-"""
-
-CBK_TEMPLATE = """
-int32_t
-fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- @LONG_ARGS@)
-{
- STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
- @SHORT_ARGS@);
- return 0;
-}
-"""
-
-CONTINUE_TEMPLATE = """
-int32_t
-fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this,
- @LONG_ARGS@)
-{
- STACK_WIND (frame, fdl_@NAME@_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
- @SHORT_ARGS@);
- return 0;
-}
-
-"""
-
-FOP_TEMPLATE = """
-int32_t
-fdl_@NAME@ (call_frame_t *frame, xlator_t *this,
- @LONG_ARGS@)
-{
- call_stub_t *stub;
-
- stub = fop_@NAME@_stub (frame, default_@NAME@,
- @SHORT_ARGS@);
- fdl_len_@NAME@ (stub);
- stub->serialize = fdl_serialize_@NAME@;
- fdl_enqueue (this, stub);
-
- return 0;
-}
-"""
-
-LEN_DICT_TEMPLATE = """
- if (@SRC@) {
- data_pair_t *memb;
- for (memb = @SRC@->members_list; memb; memb = memb->next) {
- meta_len += sizeof(int);
- meta_len += strlen(memb->key) + 1;
- meta_len += sizeof(int);
- meta_len += memb->value->len;
- }
- }
- meta_len += sizeof(int);
-"""
-
-LEN_GFID_TEMPLATE = """
- meta_len += 16;
-"""
-
-LEN_INTEGER_TEMPLATE = """
- meta_len += sizeof (@SRC@);
-"""
-
-# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL
-LEN_LOC_TEMPLATE = """
- if (@SRC@.name) {
- meta_len += (strlen (@SRC@.name) + 34);
- } else {
- meta_len += 33;
- }
-"""
-
-LEN_STRING_TEMPLATE = """
- if (@SRC@) {
- meta_len += (strlen (@SRC@) + 1);
- } else {
- meta_len += 1;
- }
-"""
-
-LEN_VECTOR_TEMPLATE = """
- meta_len += sizeof(size_t);
- data_len += iov_length (@VEC@, @CNT@);
-"""
-
-LEN_IATT_TEMPLATE = """
- meta_len += sizeof(@SRC@.ia_prot);
- meta_len += sizeof(@SRC@.ia_uid);
- meta_len += sizeof(@SRC@.ia_gid);
- meta_len += sizeof(@SRC@.ia_atime);
- meta_len += sizeof(@SRC@.ia_atime_nsec);
- meta_len += sizeof(@SRC@.ia_mtime);
- meta_len += sizeof(@SRC@.ia_mtime_nsec);
-"""
-
-SERLZ_DICT_TEMPLATE = """
- if (@SRC@) {
- data_pair_t *memb;
- for (memb = @SRC@->members_list; memb; memb = memb->next) {
- *((int *)(meta_buf+offset)) = strlen(memb->key) + 1;
- offset += sizeof(int);
- strcpy (meta_buf+offset, memb->key);
- offset += strlen(memb->key) + 1;
- *((int *)(meta_buf+offset)) = memb->value->len;
- offset += sizeof(int);
- memcpy (meta_buf+offset, memb->value->data, memb->value->len);
- offset += memb->value->len;
- }
- }
- *((int *)(meta_buf+offset)) = 0;
- offset += sizeof(int);
-"""
-
-SERLZ_GFID_TEMPLATE = """
- memcpy (meta_buf+offset, @SRC@->inode->gfid, 16);
- offset += 16;
-"""
-
-SERLZ_INTEGER_TEMPLATE = """
- memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@));
- offset += sizeof(@SRC@);
-"""
-
-SERLZ_LOC_TEMPLATE = """
- memcpy (meta_buf+offset, @SRC@.gfid, 16);
- offset += 16;
- memcpy (meta_buf+offset, @SRC@.pargfid, 16);
- offset += 16;
- if (@SRC@.name) {
- *(meta_buf+offset) = 1;
- ++offset;
- strcpy (meta_buf+offset, @SRC@.name);
- offset += (strlen (@SRC@.name) + 1);
- } else {
- *(meta_buf+offset) = 0;
- ++offset;
- }
-"""
-
-SERLZ_STRING_TEMPLATE = """
- if (@SRC@) {
- *(meta_buf+offset) = 1;
- ++offset;
- strcpy (meta_buf+offset, @SRC@);
- offset += strlen(@SRC@);
- } else {
- *(meta_buf+offset) = 0;
- ++offset;
- }
-"""
-
-SERLZ_VECTOR_TEMPLATE = """
- *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@);
- offset += sizeof(size_t);
- int32_t i;
- for (i = 0; i < @CNT@; ++i) {
- memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len);
- data_buf += @VEC@[i].iov_len;
- }
-"""
-
-# We don't need to save all of the fields - only those affected by chown,
-# chgrp, chmod, and utime.
-SERLZ_IATT_TEMPLATE = """
- *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot;
- offset += sizeof(@SRC@.ia_prot);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid;
- offset += sizeof(@SRC@.ia_uid);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid;
- offset += sizeof(@SRC@.ia_gid);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime;
- offset += sizeof(@SRC@.ia_atime);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec;
- offset += sizeof(@SRC@.ia_atime_nsec);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime;
- offset += sizeof(@SRC@.ia_mtime);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec;
- offset += sizeof(@SRC@.ia_mtime_nsec);
-"""
-
-typemap = {
- 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE),
- 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE),
- 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE),
- 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE),
- 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE),
-}
-
-def get_special_subs (args):
- len_code = ""
- ser_code = ""
- for arg in args:
- if (arg[0] != 'fop-arg') or (len(arg) < 4):
- continue
- # Let this throw an exception if we get an unknown field name. The
- # broken build will remind whoever messed with the stub code that a
- # corresponding update is needed here.
- if arg[3] == "vector":
- # Make it as obvious as possible that this is a special case.
- len_code += LEN_VECTOR_TEMPLATE \
- .replace("@VEC@", "stub->args.vector") \
- .replace("@CNT@", "stub->args.count")
- ser_code += SERLZ_VECTOR_TEMPLATE \
- .replace("@VEC@", "stub->args.vector") \
- .replace("@CNT@", "stub->args.count")
- else:
- len_tmpl, ser_tmpl = typemap[arg[2]]
- src = "stub->args.%s" % arg[3]
- len_code += len_tmpl.replace("@SRC@", src)
- ser_code += ser_tmpl.replace("@SRC@", src)
- return len_code, ser_code
-
-# Mention those fops in the selective_generate table, for which
-# only a few common functions will be generated, and mention those
-# functions. Rest of the functions can be customized
-selective_generate = {
- "ipc": "len,serialize",
- }
-
-def gen_fdl ():
- entrypoints = []
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
-
- # generate all functions for all the fops
- # except for the ones in selective_generate for which
- # generate only the functions mentioned in the
- # selective_generate table
- gen_funcs = "len,serialize,callback,continue,fop"
- if name in selective_generate:
- gen_funcs = selective_generate[name].split(",")
-
- len_code, ser_code = get_special_subs(value)
- fop_subs[name]["@LEN_CODE@"] = len_code[:-1]
- fop_subs[name]["@SER_CODE@"] = ser_code[:-1]
- if 'len' in gen_funcs:
- print(generate(LEN_TEMPLATE, name, fop_subs))
- if 'serialize' in gen_funcs:
- print(generate(SER_TEMPLATE, name, fop_subs))
- if name == 'writev':
- print("#define DESTAGE_ASYNC")
- if 'callback' in gen_funcs:
- print(generate(CBK_TEMPLATE, name, cbk_subs))
- if 'continue' in gen_funcs:
- print(generate(CONTINUE_TEMPLATE, name, fop_subs))
- if 'fop' in gen_funcs:
- print(generate(FOP_TEMPLATE, name, fop_subs))
- if name == 'writev':
- print("#undef DESTAGE_ASYNC")
- entrypoints.append(name)
- print("struct xlator_fops fops = {")
- for ep in entrypoints:
- print("\t.%s = fdl_%s," % (ep, ep))
- print("};")
-
-for l in open(sys.argv[1], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_fdl()
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py
deleted file mode 100755
index 0766f61320a..00000000000
--- a/xlators/experimental/fdl/src/gen_recon.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# See the big header comment at the start of gen_fdl.py to see how the stages
-# fit together. The big difference here is that *all* of the C code is in the
-# template file as labelled fragments, instead of as Python strings. That
-# makes it much easier to edit in one place, with proper syntax highlighting
-# and indentation.
-#
-# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
-# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
-#
-# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
-# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
-# in the middle of each function) is emitted immediately; the expanded CASE
-# code is saved for the next stage.
-#
-# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
-# in the middle of EPILOG, to generate the whole output file.
-#
-# Another way of looking at it is to consider how the fragments appear in
-# the final output:
-#
-# PROLOG
-# FOP (expanded for CREATE)
-# FOP before FUNCTION_BODY
-# LOC, INTEGER, GFID, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# FOP (expanded for WRITEV)
-# FOP before FUNCTION_BODY
-# GFID, VECTOR, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# (more FOPs)
-# EPILOG
-# EPILOG before CASE
-# CASE statements (one per fop)
-# EPILOG after CASE
-
-typemap = {
- 'dict_t *': "DICT",
- 'fd_t *': "FD",
- 'dev_t': "DOUBLE",
- 'gf_xattrop_flags_t': "INTEGER",
- 'int32_t': "INTEGER",
- 'mode_t': "INTEGER",
- 'off_t': "DOUBLE",
- 'size_t': "DOUBLE",
- 'uint32_t': "INTEGER",
- 'loc_t *': "LOC",
- 'const char *': "STRING",
- 'struct iovec *': "VECTOR",
- 'struct iatt *': "IATT",
- 'struct iobref *': "IOBREF",
-}
-
-def get_special_subs (name, args, fop_type):
- code = ""
- cleanups = ""
- links = ""
- s_args = []
- for arg in args:
- if arg[0] == 'extra':
- code += "\t%s %s;\n\n" % (arg[2], arg[1])
- s_args.append(arg[3])
- continue
- if arg[0] == 'link':
- links += fragments["LINK"].replace("@INODE_ARG@", arg[1]) \
- .replace("@IATT_ARG@", arg[2])
- continue
- if arg[0] != 'fop-arg':
- continue
- if (name, arg[1]) == ('writev', 'count'):
- # Special case: just skip this. We can't mark it as 'nosync'
- # because of the way the translator and dumper generators look for
- # that after 'stub-name' which we don't define. Instead of adding a
- # bunch of generic infrastructure for this one case, just pound it
- # here.
- continue
- recon_type = typemap[arg[2]]
- # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type)
- if (name == "create") and (arg[1] == "fd"):
- # Special case: fd for create is new, not looked up.
- # print "/* change to NEW_FD */"
- recon_type = "NEW_FD"
- elif (recon_type == "LOC") and (fop_type == "entry-op"):
- # Need to treat this differently for inode vs. entry ops.
- # Special case: link source is treated like inode-op.
- if (name != "link") or (arg[1] != "oldloc"):
- # print "/* change to PARENT_LOC */"
- recon_type = "PARENT_LOC"
- code += fragments[recon_type].replace("@ARGNAME@", arg[1]) \
- .replace("@ARGTYPE@", arg[2])
- cleanup_key = recon_type + "_CLEANUP"
- if cleanup_key in fragments:
- new_frag = fragments[cleanup_key].replace("@ARGNAME@", arg[1])
- # Make sure these get added in *reverse* order. Otherwise, a
- # failure for an earlier argument might goto a label that falls
- # through to the cleanup code for a variable associated with a
- # later argument, but that variable might not even have been
- # *declared* (let alone initialized) yet. Consider the following
- # case.
- #
- # process argument A (on failure goto cleanup_A)
- # set error label to cleanup_A
- #
- # declare pointer variable for argument B
- # process argument B (on failure goto cleanup_B)
- #
- # cleanup_A:
- # /* whatever */
- # cleanup_B:
- # free pointer variable <= "USED BUT NOT SET" error here
- #
- # By adding these in reverse order, we ensure that cleanup_B is
- # actually *before* cleanup_A, and nothing will try to do the free
- # until we've actually attempted processing of B.
- cleanups = new_frag + cleanups
- if 'nosync' in arg[4:]:
- code += "\t(void)%s;\n" % arg[1];
- continue
- if arg[2] in ("loc_t *", "struct iatt *"):
- # These are passed as pointers to the syncop, but they're actual
- # structures in the generated code.
- s_args.append("&"+arg[1]);
- else:
- s_args.append(arg[1])
- # We have to handle a couple of special cases here, because some n00b
- # defined the syncops with a different argument order than the fops they're
- # based on.
- if name == 'writev':
- # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which
- # is not stored in or read from the journal. There are other ways to
- # do that, but this is the only place we need anything similar and we
- # already have to treat it as a special case so this is simplest.
- s_args_str = 'fd, &vector, 1, off, iobref, flags, &preop, &postop, xdata'
- elif name == 'symlink':
- # Swap 'linkpath' and 'loc'.
- s_args_str = '&loc, linkpath, &iatt, xdata'
- elif name == 'xattrop':
- s_args_str = '&loc, flags, dict, xdata, NULL'
- elif name == 'fxattrop':
- s_args_str = 'fd, flags, dict, xdata, NULL'
- else:
- s_args_str = ', '.join(s_args)
- return code, links, s_args_str, cleanups
-
-# TBD: probably need to generate type-specific cleanup code as well - e.g.
-# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these
-# generated CLEANUP fragments will go at the end of the function, with goto
-# labels. Meanwhile, the error-checking part of each type-specific fragment
-# (e.g. LOC or FD) will need to update the indirect label that we jump to when
-# an error is detected. This will probably get messy.
-def gen_functions ():
- code = ""
- for name, value in ops.items():
- fop_type = [ x[1] for x in value if x[0] == "journal" ]
- if not fop_type:
- continue
- body, links, syncop_args, cleanups = get_special_subs (name, value,
- fop_type[0])
- fop_subs[name]["@FUNCTION_BODY@"] = body
- fop_subs[name]["@LINKS@"] = links
- fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args
- fop_subs[name]["@CLEANUPS@"] = cleanups
- if name == "writev":
- # Take advantage of the fact that, *during reconciliation*, the
- # vector is always a single element. In normal I/O it's not.
- fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len"
- else:
- fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS"
- # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
- code += generate(fragments["FOP"], name, fop_subs)
- return code
-
-def gen_cases ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- # Add the CASE fragment for this fop.
- code += generate(fragments["CASE"], name, fop_subs)
- return code
-
-def load_fragments (path="recon-tmpl.c"):
- pragma_re = re.compile('pragma fragment (.*)')
- cur_symbol = None
- cur_value = ""
- result = {}
- for line in open(path, "r").readlines():
- m = pragma_re.search(line)
- if m:
- if cur_symbol:
- result[cur_symbol] = cur_value
- cur_symbol = m.group(1)
- cur_value = ""
- else:
- cur_value += line
- if cur_symbol:
- result[cur_symbol] = cur_value
- return result
-
-if __name__ == "__main__":
- fragments = load_fragments(sys.argv[1])
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- print(fragments["PROLOG"])
- print(gen_functions())
- print(fragments["EPILOG"].replace("@SWITCH_BODY@", gen_cases()))
- print("/* END GENERATED CODE */")
diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c
deleted file mode 100644
index 6fbc5218d47..00000000000
--- a/xlators/experimental/fdl/src/logdump.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-extern int
-fdl_dump(char **, char **);
-
-int
-main(int argc, char **argv)
-{
- int meta_fd = (-1);
- char *meta_buf = NULL;
- int data_fd = (-1);
- char *data_buf = NULL;
-
- meta_fd = open(argv[1], O_RDONLY);
- if (meta_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
- if (meta_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- data_fd = open(argv[2], O_RDONLY);
- if (data_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
- if (data_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- for (;;) {
- if (!fdl_dump(&meta_buf, &data_buf)) {
- break;
- }
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/xlators/experimental/fdl/src/recon-tmpl.c.in b/xlators/experimental/fdl/src/recon-tmpl.c.in
deleted file mode 100644
index 5115dfd5c75..00000000000
--- a/xlators/experimental/fdl/src/recon-tmpl.c.in
+++ /dev/null
@@ -1,297 +0,0 @@
-#pragma fragment PROLOG
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "iatt.h"
-#include "syncop.h"
-#include "xlator.h"
-#include "glfs-internal.h"
-
-#include "fdl.h"
-
-#define GFAPI_SUCCESS 0
-
-inode_t *
-recon_get_inode(glfs_t *fs, uuid_t gfid)
-{
- inode_t *inode;
- loc_t loc = {
- NULL,
- };
- struct iatt iatt;
- int ret;
- inode_t *newinode;
-
- inode = inode_find(fs->active_subvol->itable, gfid);
- if (inode) {
- printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
- return inode;
- }
-
- loc.inode = inode_new(fs->active_subvol->itable);
- if (!loc.inode) {
- return NULL;
- }
- gf_uuid_copy(loc.inode->gfid, gfid);
- gf_uuid_copy(loc.gfid, gfid);
-
- printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
-
- ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "syncop_lookup failed (%d)\n", ret);
- return NULL;
- }
-
- newinode = inode_link(loc.inode, NULL, NULL, &iatt);
- if (newinode) {
- inode_lookup(newinode);
- }
-
- return newinode;
-}
-
-#pragma fragment DICT
-dict_t *@ARGNAME@;
-
-@ARGNAME@ = dict_new();
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-{
- int key_len, data_len;
- char *key_ptr;
- int garbage;
- for (;;) {
- key_len = *((int *)new_meta);
- new_meta += sizeof(int);
- if (!key_len) {
- break;
- }
- key_ptr = new_meta;
- new_meta += key_len;
- data_len = *((int *)new_meta);
- new_meta += sizeof(int);
- garbage = dict_set_static_bin(@ARGNAME@, key_ptr, new_meta, data_len);
- /* TBD: check error from dict_set_static_bin */
- (void)garbage;
- new_meta += data_len;
- }
-}
-
-#pragma fragment DICT_CLEANUP
-cleanup_@ARGNAME@ : dict_unref(@ARGNAME@);
-
-#pragma fragment DOUBLE
-@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
-new_meta += sizeof(uint64_t);
-
-#pragma fragment FD
-inode_t *@ARGNAME@_ino;
-fd_t *@ARGNAME@;
-
-@ARGNAME@_ino = recon_get_inode(fs, *((uuid_t *)new_meta));
-new_meta += 16;
-if (!@ARGNAME@_ino) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@_ino;
-
-@ARGNAME@ = fd_anonymous(@ARGNAME@_ino);
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-#pragma fragment FD_CLEANUP
-cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
-cleanup_@ARGNAME@_ino : inode_unref(@ARGNAME@_ino);
-
-#pragma fragment NEW_FD
-/*
- * This pseudo-type is only used for create, and in that case we know
- * we'll be using loc.inode, so it's not worth generalizing to take an
- * extra argument.
- */
-fd_t *@ARGNAME@ = fd_anonymous(loc.inode);
-
-if (!fd) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-new_meta += 16;
-
-#pragma fragment NEW_FD_CLEANUP
-cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
-
-#pragma fragment INTEGER
-@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
-
-new_meta += sizeof(@ARGTYPE@);
-
-#pragma fragment LOC
-loc_t @ARGNAME@ = {
- NULL,
-};
-
-@ARGNAME@.inode = recon_get_inode(fs, *((uuid_t *)new_meta));
-if (!@ARGNAME@.inode) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-gf_uuid_copy(@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
-new_meta += 16;
-new_meta += 16; /* skip over pargfid */
-if (*(new_meta++)) {
- @ARGNAME@.name = new_meta;
- new_meta += strlen(new_meta) + 1;
-}
-
-#pragma fragment LOC_CLEANUP
-cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
-
-#pragma fragment PARENT_LOC
-loc_t @ARGNAME@ = {
- NULL,
-};
-
-new_meta += 16; /* skip over gfid */
-@ARGNAME@.parent = recon_get_inode(fs, *((uuid_t *)new_meta));
-if (!@ARGNAME@.parent) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-gf_uuid_copy(@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
-new_meta += 16;
-if (!*(new_meta++)) {
- goto *err_label;
-}
-@ARGNAME@.name = new_meta;
-new_meta += strlen(new_meta) + 1;
-
-@ARGNAME@.inode = inode_new(fs->active_subvol->itable);
-if (!@ARGNAME@.inode) {
- goto *err_label;
-}
-
-#pragma fragment PARENT_LOC_CLEANUP
-cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
-
-#pragma fragment STRING
-char *@ARGNAME@;
-if (*(new_meta++)) {
- @ARGNAME@ = new_meta;
- new_meta += (strlen(new_meta) + 1);
-} else {
- goto *err_label;
-}
-
-#pragma fragment VECTOR
-struct iovec @ARGNAME@;
-
-@ARGNAME@.iov_len = *((size_t *)new_meta);
-new_meta += sizeof(@ARGNAME@.iov_len);
-@ARGNAME@.iov_base = new_data;
-new_data += @ARGNAME@.iov_len;
-
-#pragma fragment IATT
-struct iatt @ARGNAME@;
-{
- @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
- new_meta += sizeof(ia_prot_t);
- uint32_t *myints = (uint32_t *)new_meta;
- @ARGNAME@.ia_uid = myints[0];
- @ARGNAME@.ia_gid = myints[1];
- @ARGNAME@.ia_atime = myints[2];
- @ARGNAME@.ia_atime_nsec = myints[3];
- @ARGNAME@.ia_mtime = myints[4];
- @ARGNAME@.ia_mtime_nsec = myints[5];
- new_meta += sizeof(*myints) * 6;
-}
-
-#pragma fragment IOBREF
-struct iobref *@ARGNAME@;
-
-@ARGNAME@ = iobref_new();
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-#pragma fragment IOBREF_CLEANUP
-cleanup_@ARGNAME@ : iobref_unref(@ARGNAME@);
-
-#pragma fragment LINK
-/* TBD: check error */
-inode_t *new_inode = inode_link(@INODE_ARG@, NULL, NULL, @IATT_ARG@);
-if (new_inode) {
- inode_lookup(new_inode);
-}
-
-#pragma fragment FOP
-int fdl_replay_@NAME@(glfs_t *fs, char **old_meta, char **old_data)
-{
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- int ret;
- int status = 0xbad;
- void *err_label = &&done;
-
- @FUNCTION_BODY@
-
- ret = syncop_@NAME@(fs->active_subvol, @SYNCOP_ARGS@, NULL);
- if (ret !=@SUCCESS_VALUE@) {
- fprintf(stderr, "syncop_@NAME@ returned %d", ret);
- goto *err_label;
- }
-
- @LINKS@
-
- status = 0;
-
- @CLEANUPS@
-
- done : *old_meta = new_meta;
- *old_data = new_data;
- return status;
-}
-
-#pragma fragment CASE
-case GF_FOP_@UPNAME@:
- printf("=== GF_FOP_@UPNAME@\n");
- if (fdl_replay_@NAME@(fs, &new_meta, &new_data) != 0) {
- goto done;
- }
- recognized = 1;
- break;
-
-#pragma fragment EPILOG
- int
- recon_execute(glfs_t *fs, char **old_meta, char **old_data)
- {
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- int recognized = 0;
- event_header_t *eh;
-
- eh = (event_header_t *)new_meta;
- new_meta += sizeof(*eh);
-
- /* TBD: check event_type instead of assuming NEW_REQUEST */
-
- switch (eh->fop_type) {
- @SWITCH_BODY@
-
- default : printf("unknown fop %u\n", eh->fop_type);
- }
-
- done:
- *old_meta = new_meta;
- *old_data = new_data;
- return recognized;
- }
diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c
deleted file mode 100644
index ec1bf37dad9..00000000000
--- a/xlators/experimental/fdl/src/recon.c
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-#include "glusterfs.h"
-#include "fd.h"
-#include "syncop.h"
-#include "glfs-internal.h"
-
-#define GFAPI_SUCCESS 0
-
-extern int
-recon_execute(glfs_t *, char **, char **);
-
-int
-main(int argc, char **argv)
-{
- glfs_t *fs;
- int ret;
- int meta_fd = (-1);
- char *meta_buf = NULL;
- int data_fd = (-1);
- char *data_buf = NULL;
-
- fs = glfs_new("whocares");
- if (!fs) {
- fprintf(stderr, "glfs_new failed\n");
- return EXIT_FAILURE;
- }
-
- if (getenv("RECON_DEBUG")) {
- ret = glfs_set_logging(fs, "/dev/stderr", 7);
- } else {
- ret = glfs_set_logging(fs, "/dev/null", 0);
- }
-
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_set_logging failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- ret = glfs_set_volfile(fs, argv[1]);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_set_volfile failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- ret = glfs_init(fs);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_init failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- meta_fd = open(argv[2], O_RDONLY);
- if (meta_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
- if (meta_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- data_fd = open(argv[3], O_RDONLY);
- if (data_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
- if (data_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- for (;;) {
- if (!recon_execute(fs, &meta_buf, &data_buf)) {
- break;
- }
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/xlators/experimental/jbr-client/Makefile.am b/xlators/experimental/jbr-client/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/jbr-client/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am
deleted file mode 100644
index c71f5ff1e83..00000000000
--- a/xlators/experimental/jbr-client/src/Makefile.am
+++ /dev/null
@@ -1,34 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = jbrc.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-nodist_jbrc_la_SOURCES = jbrc-cg.c
-CLEANFILES = $(nodist_jbrc_la_SOURCES)
-
-jbrc_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-jbrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = $(top_srcdir)/xlators/lib/src/libxlator.h \
- $(top_srcdir)/glusterfsd/src/glusterfsd.h \
- jbrc.h jbr-messages.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src
-JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py
-JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c.in
-JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c
-noinst_PYTHON = $(JBRC_GEN_FOPS)
-EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
-
-jbrc-cg.c: $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
- $(PYTHON) $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) > $@
-
-uninstall-local:
- rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-client/src/fop-template.c.in b/xlators/experimental/jbr-client/src/fop-template.c.in
deleted file mode 100644
index 9732badc794..00000000000
--- a/xlators/experimental/jbr-client/src/fop-template.c.in
+++ /dev/null
@@ -1,102 +0,0 @@
-/* template-name fop */
-int32_t jbrc_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbrc_local_t *local = NULL;
- xlator_t *target_xl = ACTIVE_CHILD(this);
-
- local = mem_get(this->local_pool);
- if (!local) {
- goto err;
- }
-
- local->stub = fop_@NAME@_stub(frame, jbrc_@NAME@_continue, @SHORT_ARGS@);
- if (!local->stub) {
- goto err;
- }
- local->curr_xl = target_xl;
- local->scars = 0;
-
- frame->local = local;
- STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, target_xl, target_xl,
- target_xl->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-
-err:
- if (local) {
- mem_put(local);
- }
- STACK_UNWIND_STRICT(@NAME@, frame, -1, ENOMEM, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name cbk */
-int32_t jbrc_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- jbrc_local_t *local = frame->local;
- xlator_t *last_xl = cookie;
- xlator_t *next_xl;
- jbrc_private_t *priv = this->private;
- struct timespec spec;
-
- if (op_ret != (-1)) {
- if (local->scars) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
- HILITE("retried %p OK"), frame->local);
- }
- priv->active = last_xl;
- goto unwind;
- }
- if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) {
- goto unwind;
- }
-
- /* TBD: get leader ID from xdata? */
- next_xl = next_xlator(this, last_xl);
- /*
- * We can't just give up after we've tried all bricks, because it's
- * quite likely that a new leader election just hasn't finished yet.
- * We also shouldn't retry endlessly, and especially not at a high
- * rate, but that's good enough while we work on other things.
- *
- * TBD: implement slow/finite retry via a worker thread
- */
- if (!next_xl || (local->scars >= SCAR_LIMIT)) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG,
- HILITE("ran out of retries for %p"), frame->local);
- goto unwind;
- }
-
- local->curr_xl = next_xl;
- local->scars += 1;
- spec.tv_sec = 1;
- spec.tv_nsec = 0;
- /*
- * WARNING
- *
- * Just calling gf_timer_call_after like this leaves open the
- * possibility that writes will get reordered, if a first write is
- * rescheduled and then a second comes along to find an updated
- * priv->active before the first actually executes. We might need to
- * implement a stricter (and more complicated) queuing mechanism to
- * ensure absolute consistency in this case.
- */
- if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) {
- return 0;
- }
-
-unwind:
- call_stub_destroy(local->stub);
- STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
- return 0;
-}
-
-/* template-name cont-func */
-int32_t jbrc_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbrc_local_t *local = frame->local;
-
- STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, local->curr_xl, local->curr_xl,
- local->curr_xl->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-}
diff --git a/xlators/experimental/jbr-client/src/gen-fops.py b/xlators/experimental/jbr-client/src/gen-fops.py
deleted file mode 100755
index a5ddf577fbe..00000000000
--- a/xlators/experimental/jbr-client/src/gen-fops.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname(sys.argv[0])
-gendir = os.path.join(curdir, '../../../../libglusterfs/src')
-sys.path.append(gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# We really want the callback argument list, even when we're generating fop
-# code, so we propagate here.
-# TBD: this should probably be right in generate.py
-for k, v in cbk_subs.items():
- fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
-
-# Stolen from old codegen.py
-def load_templates (path):
- templates = {}
- tmpl_re = re.compile("/\* template-name (.*) \*/")
- templates = {}
- t_name = None
- for line in open(path, "r").readlines():
- if not line:
- break
- m = tmpl_re.match(line)
- if m:
- if t_name:
- templates[t_name] = ''.join(t_contents)
- t_name = m.group(1).strip()
- t_contents = []
- elif t_name:
- t_contents.append(line)
- if t_name:
- templates[t_name] = ''.join(t_contents)
- return templates
-
-# Stolen from gen_fdl.py
-def gen_client (templates):
- for name, value in ops.items():
- if name == 'getspec':
- # It's not real if it doesn't have a stub function.
- continue
- print(generate(templates['cbk'], name, cbk_subs))
- print(generate(templates['cont-func'], name, fop_subs))
- print(generate(templates['fop'], name, fop_subs))
-
-tmpl = load_templates(sys.argv[1])
-for l in open(sys.argv[2], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_client(tmpl)
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h
deleted file mode 100644
index ecbf569ec13..00000000000
--- a/xlators/experimental/jbr-client/src/jbr-messages.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _JBR_MESSAGES_H_
-#define _JBR_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(JBR, J_MSG_INIT_FAIL, J_MSG_RETRY_MSG, J_MSG_MEM_ERR, J_MSG_DICT_FLR,
- J_MSG_GENERIC, J_MSG_INVALID, J_MSG_NO_DATA, J_MSG_SYS_CALL_FAILURE,
- J_MSG_QUORUM_NOT_MET, J_MSG_LOCK_FAILURE);
-
-#endif /* _JBR_MESSAGES_H_ */
diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c
deleted file mode 100644
index 28801ecc99f..00000000000
--- a/xlators/experimental/jbr-client/src/jbrc.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "defaults.h"
-#include "timer.h"
-#include "xlator.h"
-#include "jbr-messages.h"
-#include "jbrc.h"
-#include "statedump.h"
-
-#define SCAR_LIMIT 20
-#define HILITE(x) ("" x "")
-
-/*
- * The fops are actually generated by gen-fops.py; the rest was mostly copied
- * from defaults.c (commit cd253754 on 27 August 2013).
- */
-
-enum gf_dht_mem_types_ {
- gf_mt_jbrc_private_t = gf_common_mt_end + 1,
- gf_mt_jbrc_end
-};
-
-char *JBRC_XATTR = "user.jbr.active";
-
-static inline xlator_t *
-ACTIVE_CHILD(xlator_t *parent)
-{
- jbrc_private_t *priv = parent->private;
-
- return priv ? priv->active : FIRST_CHILD(parent);
-}
-
-xlator_t *
-next_xlator(xlator_t *this, xlator_t *prev)
-{
- xlator_list_t *trav;
-
- for (trav = this->children; trav; trav = trav->next) {
- if (trav->xlator == prev) {
- return trav->next ? trav->next->xlator : this->children->xlator;
- }
- }
-
- return NULL;
-}
-
-void
-jbrc_retry_cb(void *cb_arg)
-{
- jbrc_local_t *local = cb_arg;
-
- gf_msg(__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, HILITE("retrying %p"),
- local);
- call_resume_wind(local->stub);
-}
-
-#pragma generate
-
-int32_t
-jbrc_forget(xlator_t *this, inode_t *inode)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement forget_cbk");
- return 0;
-}
-
-int32_t
-jbrc_releasedir(xlator_t *this, fd_t *fd)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement releasedir_cbk");
- return 0;
-}
-
-int32_t
-jbrc_release(xlator_t *this, fd_t *fd)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement release_cbk");
- return 0;
-}
-
-struct xlator_fops fops = {
- .lookup = jbrc_lookup,
- .stat = jbrc_stat,
- .fstat = jbrc_fstat,
- .truncate = jbrc_truncate,
- .ftruncate = jbrc_ftruncate,
- .access = jbrc_access,
- .readlink = jbrc_readlink,
- .mknod = jbrc_mknod,
- .mkdir = jbrc_mkdir,
- .unlink = jbrc_unlink,
- .rmdir = jbrc_rmdir,
- .symlink = jbrc_symlink,
- .rename = jbrc_rename,
- .link = jbrc_link,
- .create = jbrc_create,
- .open = jbrc_open,
- .readv = jbrc_readv,
- .writev = jbrc_writev,
- .flush = jbrc_flush,
- .fsync = jbrc_fsync,
- .opendir = jbrc_opendir,
- .readdir = jbrc_readdir,
- .readdirp = jbrc_readdirp,
- .fsyncdir = jbrc_fsyncdir,
- .statfs = jbrc_statfs,
- .setxattr = jbrc_setxattr,
- .getxattr = jbrc_getxattr,
- .fsetxattr = jbrc_fsetxattr,
- .fgetxattr = jbrc_fgetxattr,
- .removexattr = jbrc_removexattr,
- .fremovexattr = jbrc_fremovexattr,
- .lk = jbrc_lk,
- .inodelk = jbrc_inodelk,
- .finodelk = jbrc_finodelk,
- .entrylk = jbrc_entrylk,
- .fentrylk = jbrc_fentrylk,
- .rchecksum = jbrc_rchecksum,
- .xattrop = jbrc_xattrop,
- .fxattrop = jbrc_fxattrop,
- .setattr = jbrc_setattr,
- .fsetattr = jbrc_fsetattr,
- .fallocate = jbrc_fallocate,
- .discard = jbrc_discard,
-};
-
-struct xlator_cbks cbks = {};
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbrc", this, out);
-
- ret = xlator_mem_acct_init(this, gf_mt_jbrc_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "Memory accounting init failed");
- return ret;
- }
-out:
- return ret;
-}
-
-int32_t
-jbrc_init(xlator_t *this)
-{
- jbrc_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- this->local_pool = mem_pool_new(jbrc_local_t, 128);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "failed to create jbrc_local_t pool");
- goto err;
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbrc_private_t);
- if (!priv) {
- goto err;
- }
-
- for (trav = this->children; trav; trav = trav->next) {
- ++(priv->n_children);
- }
-
- priv->active = FIRST_CHILD(this);
- this->private = priv;
- return 0;
-
-err:
- if (priv) {
- GF_FREE(priv);
- }
- return -1;
-}
-
-void
-jbrc_fini(xlator_t *this)
-{
- GF_FREE(this->private);
-}
-
-int
-jbrc_get_child_index(xlator_t *this, xlator_t *kid)
-{
- xlator_list_t *trav;
- int retval = -1;
-
- for (trav = this->children; trav; trav = trav->next) {
- ++retval;
- if (trav->xlator == kid) {
- return retval;
- }
- }
-
- return -1;
-}
-
-uint8_t
-jbrc_count_up_kids(jbrc_private_t *priv)
-{
- uint8_t retval = 0;
- uint8_t i;
-
- for (i = 0; i < priv->n_children; ++i) {
- if (priv->kid_state & (1 << i)) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-int32_t
-jbrc_notify(xlator_t *this, int32_t event, void *data, ...)
-{
- int32_t ret = 0;
- int32_t index = 0;
- jbrc_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- index = jbrc_get_child_index(this, data);
- if (index >= 0) {
- priv->kid_state |= (1 << index);
- priv->up_children = jbrc_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_UP for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- }
- ret = default_notify(this, event, data);
- break;
- case GF_EVENT_CHILD_DOWN:
- index = jbrc_get_child_index(this, data);
- if (index >= 0) {
- priv->kid_state &= ~(1 << index);
- priv->up_children = jbrc_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_DOWN for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- }
- break;
- default:
- ret = default_notify(this, event, data);
- }
-
-out:
- return ret;
-}
-
-int
-jbrc_priv_dump(xlator_t *this)
-{
- jbrc_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- xlator_list_t *trav = NULL;
- int32_t i = -1;
-
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section("%s", key_prefix);
-
- gf_proc_dump_write("up_children", "%u", priv->up_children);
-
- for (trav = this->children, i = 0; trav; trav = trav->next, i++) {
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i);
- gf_proc_dump_write(key_prefix, "%s", trav->xlator->name);
- }
-
-out:
- return 0;
-}
-
-struct xlator_dumpops dumpops = {
- .priv = jbrc_priv_dump,
-};
-
-class_methods_t class_methods = {
- .init = jbrc_init,
- .fini = jbrc_fini,
- .notify = jbrc_notify,
-};
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/jbr-client/src/jbrc.h b/xlators/experimental/jbr-client/src/jbrc.h
deleted file mode 100644
index f99178402b3..00000000000
--- a/xlators/experimental/jbr-client/src/jbrc.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _JBRC_H_
-#define _JBRC_H_
-
-typedef struct {
- xlator_t *active;
- uint8_t up_children;
- uint8_t n_children;
- uint32_t kid_state;
-} jbrc_private_t;
-
-typedef struct {
- call_stub_t *stub;
- xlator_t *curr_xl;
- uint16_t scars;
-} jbrc_local_t;
-
-#endif /* _JBRC_H_ */
diff --git a/xlators/experimental/jbr-server/Makefile.am b/xlators/experimental/jbr-server/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/jbr-server/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am
deleted file mode 100644
index 42d3c8a6c36..00000000000
--- a/xlators/experimental/jbr-server/src/Makefile.am
+++ /dev/null
@@ -1,39 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = jbr.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-nodist_jbr_la_SOURCES = jbr-cg.c
-CLEANFILES = $(nodist_jbr_la_SOURCES)
-
-jbr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la
-
-noinst_HEADERS = jbr-internal.h \
- $(top_srcdir)/xlators/lib/src/libxlator.h \
- $(top_srcdir)/xlators/experimental/fdl/src/fdl.h \
- $(top_srcdir)/glusterfsd/src/glusterfsd.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src \
- -I$(top_srcdir)/xlators/experimental/fdl/src/ \
- -DSBIN_DIR=\"$(sbindir)\" -I$(top_srcdir)/api/src \
- -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \
- -I$(top_srcdir)/xlators/experimental/jbr-client/src/
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src
-JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py
-JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c.in
-JBR_WRAPPER = $(JBR_PREFIX)/jbr.c
-noinst_PYTHON = $(JBR_GEN_FOPS)
-EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER)
-
-jbr-cg.c: $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER)
- $(PYTHON) $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) > $@
-
-uninstall-local:
- rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-server/src/all-templates.c.in b/xlators/experimental/jbr-server/src/all-templates.c.in
deleted file mode 100644
index a9d57fc646f..00000000000
--- a/xlators/experimental/jbr-server/src/all-templates.c.in
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * You can put anything here - it doesn't even have to be a comment - and it
- * will be ignored until we reach the first template-name comment.
- */
-
-/* template-name read-fop */
-int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_private_t *priv = NULL;
- gf_boolean_t in_recon = _gf_false;
- int32_t op_errno = 0;
- int32_t recon_term, recon_index;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
-
- op_errno = EREMOTE;
-
- /* allow reads during reconciliation *
- * TBD: allow "dirty" reads on non-leaders *
- */
- if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
- (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
- in_recon = _gf_true;
- }
-
- if ((!priv->leader) && (in_recon == _gf_false)) {
- goto err;
- }
-
- STACK_WIND(frame, default_@NAME@_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-
-err:
- STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name read-perform_local_op */
-/* No "perform_local_op" function needed for @NAME@ */
-
-/* template-name read-dispatch */
-/* No "dispatch" function needed for @NAME@ */
-
-/* template-name read-call_dispatch */
-/* No "call_dispatch" function needed for @NAME@ */
-
-/* template-name read-fan-in */
-/* No "fan-in" function needed for @NAME@ */
-
-/* template-name read-continue */
-/* No "continue" function needed for @NAME@ */
-
-/* template-name read-complete */
-/* No "complete" function needed for @NAME@ */
-
-/* template-name write-fop */
-int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- int op_errno = ENOMEM;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
-
-#if defined(JBR_CG_NEED_FD)
- ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd);
-#else
- ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL);
-#endif
- if (ret)
- goto err;
-
- local = frame->local;
-
- /*
- * If we let it through despite not being the leader, then we just want
- * to pass it on down without all of the additional xattrs, queuing, and
- * so on. However, jbr_*_complete does depend on the initialization
- * immediately above this.
- */
- if (!priv->leader) {
- STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- return 0;
- }
-
- ret = jbr_initialize_xdata_set_attrs(this, &xdata);
- if (ret)
- goto err;
-
- local->xdata = dict_ref(xdata);
- local->stub = fop_@NAME@_stub(frame, jbr_@NAME@_continue, @SHORT_ARGS@);
- if (!local->stub) {
- goto err;
- }
-
- /*
- * Can be used to just call_dispatch or be customised per fop to *
- * perform ops specific to that particular fop. *
- */
- ret = jbr_@NAME@_perform_local_op(frame, this, &op_errno, @SHORT_ARGS@);
- if (ret)
- goto err;
-
- return ret;
-err:
- if (local) {
- if (local->stub) {
- call_stub_destroy(local->stub);
- }
- if (local->qstub) {
- call_stub_destroy(local->qstub);
- }
- if (local->fd) {
- fd_unref(local->fd);
- }
- mem_put(local);
- }
- STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name write-perform_local_op */
-int32_t jbr_@NAME@_perform_local_op(call_frame_t *frame, xlator_t *this,
- int *op_errno, @LONG_ARGS@)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
-
- ret = jbr_@NAME@_call_dispatch(frame, this, op_errno, @SHORT_ARGS@);
-
-out:
- return ret;
-}
-
-/* template-name write-call_dispatch */
-int32_t jbr_@NAME@_call_dispatch(call_frame_t *frame, xlator_t *this,
- int *op_errno, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
-
-#if defined(JBR_CG_QUEUE)
- jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
- if (!ictx) {
- *op_errno = EIO;
- goto out;
- }
-
- LOCK(&ictx->lock);
- if (ictx->active) {
- gf_msg_debug(this->name, 0, "queuing request due to conflict");
- /*
- * TBD: enqueue only for real conflict
- *
- * Currently we just act like all writes are in
- * conflict with one another. What we should really do
- * is check the active/pending queues and defer only if
- * there's a conflict there.
- *
- * It's important to check the pending queue because we
- * might have an active request X which conflicts with
- * a pending request Y, and this request Z might
- * conflict with Y but not X. If we checked only the
- * active queue then Z could jump ahead of Y, which
- * would be incorrect.
- */
- local->qstub = fop_@NAME@_stub(frame, jbr_@NAME@_dispatch,
- @SHORT_ARGS@);
- if (!local->qstub) {
- UNLOCK(&ictx->lock);
- goto out;
- }
- list_add_tail(&local->qlinks, &ictx->pqueue);
- ++(ictx->pending);
- UNLOCK(&ictx->lock);
- ret = 0;
- goto out;
- } else {
- list_add_tail(&local->qlinks, &ictx->aqueue);
- ++(ictx->active);
- }
- UNLOCK(&ictx->lock);
-#endif
- ret = jbr_@NAME@_dispatch(frame, this, @SHORT_ARGS@);
-
-out:
- return ret;
-}
-
-/* template-name write-dispatch */
-int32_t jbr_@NAME@_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- xlator_list_t *trav;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- /*
- * TBD: unblock pending request(s) if we fail after this point but
- * before we get to jbr_@NAME@_complete (where that code currently
- * resides).
- */
-
- local->call_count = priv->n_children - 1;
- for (trav = this->children->next; trav; trav = trav->next) {
- STACK_WIND(frame, jbr_@NAME@_fan_in, trav->xlator,
- trav->xlator->fops->@NAME@, @SHORT_ARGS@);
- }
-
- /* TBD: variable Issue count */
- ret = 0;
-out:
- return ret;
-}
-
-/* template-name write-fan-in */
-int32_t jbr_@NAME@_fan_in(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- int32_t ret = -1;
- uint8_t call_count;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- if (op_ret != -1) {
- /* Increment the number of successful acks *
- * received for the operation. *
- */
- (local->successful_acks)++;
- local->successful_op_ret = op_ret;
- }
- gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
- op_ret, op_errno, local->successful_acks);
- UNLOCK(&frame->lock);
-
- /* TBD: variable Completion count */
- if (call_count == 0) {
- call_resume(local->stub);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* template-name write-continue */
-int32_t jbr_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_local_t *local = NULL;
- jbr_local_t *new_local = NULL;
- jbr_private_t *priv = NULL;
- int32_t op_errno = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- /* Perform quorum check to see if the leader needs *
- * to perform the operation. If the operation will not *
- * meet quorum irrespective of the leader's result *
- * there is no point in the leader performing the fop *
- */
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks + 1);
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Didn't receive enough acks "
- "to meet quorum. Failing the operation without trying "
- "it on the leader.");
-
-#if defined(JBR_CG_QUEUE)
- /*
- * In case of a fop failure, before unwinding need to *
- * remove it from queue *
- */
- ret = jbr_remove_from_queue(frame, this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC,
- "Failed to remove from queue.");
- }
-#endif
-
- /*
- * In this case, the quorum is not met on the followers *
- * So the operation will not be performed on the leader *
- * and a rollback will be sent via GF_FOP_IPC to all the *
- * followers, where this particular fop's term and index *
- * numbers will be journaled, and later used to rollback *
- */
- call_frame_t *new_frame;
-
- new_frame = copy_frame(frame);
-
- if (new_frame) {
- new_local = mem_get0(this->local_pool);
- if (new_local) {
- INIT_LIST_HEAD(&new_local->qlinks);
- ret = dict_set_int32(local->xdata, "rollback-fop",
- GF_FOP_@UPNAME@);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set rollback-fop");
- } else {
- new_local->xdata = dict_ref(local->xdata);
- new_frame->local = new_local;
- jbr_ipc_call_dispatch(new_frame, this, &op_errno,
- FDL_IPC_JBR_SERVER_ROLLBACK,
- new_local->xdata);
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not create local for new_frame");
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc");
- }
-
- STACK_UNWIND_STRICT(@NAME@, frame, -1, EROFS, @ERROR_ARGS@);
- } else {
- STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- }
-
-out:
- return 0;
-}
-
-/* template-name write-complete */
-int32_t jbr_@NAME@_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_private_t *priv = NULL;
- jbr_local_t *local = NULL;
- jbr_local_t *new_local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, local, err);
-
- /* If the fop failed on the leader, then reduce one successful ack
- * before calculating the fop quorum
- */
- LOCK(&frame->lock);
- if (op_ret == -1)
- (local->successful_acks)--;
- UNLOCK(&frame->lock);
-
-#if defined(JBR_CG_QUEUE)
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto err;
-#endif
-
-#if defined(JBR_CG_FSYNC)
- jbr_mark_fd_dirty(this, local);
-#endif
-
-#if defined(JBR_CG_NEED_FD)
- fd_unref(local->fd);
-#endif
-
- /* After the leader completes the fop, a quorum check is *
- * performed, taking into account the outcome of the fop *
- * on the leader. Irrespective of the fop being successful *
- * or failing on the leader, the result of the quorum will *
- * determine if the overall fop is successful or not. For *
- * example, a fop might have succeeded on every node except *
- * the leader, in which case as quorum is being met, the fop *
- * will be treated as a successful fop, even though it failed *
- * on the leader. On follower nodes, no quorum check should *
- * be done, and the result is returned to the leader as is. *
- */
- if (priv->leader) {
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks + 1);
- if (result == _gf_false) {
- op_ret = -1;
- op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Quorum is not met. "
- "The operation has failed.");
- /*
- * In this case, the quorum is not met after the *
- * operation is performed on the leader. Hence a *
- * rollback will be sent via GF_FOP_IPC to the leader *
- * where this particular fop's term and index numbers *
- * will be journaled, and later used to rollback. *
- * The same will be done on all the followers *
- */
- call_frame_t *new_frame;
-
- new_frame = copy_frame(frame);
- if (new_frame) {
- new_local = mem_get0(this->local_pool);
- if (new_local) {
- INIT_LIST_HEAD(&new_local->qlinks);
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "op = %d", new_frame->op);
- ret = dict_set_int32(local->xdata, "rollback-fop",
- GF_FOP_@UPNAME@);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set "
- "rollback-fop");
- } else {
- new_local->xdata = dict_ref(local->xdata);
- new_frame->local = new_local;
- /*
- * Calling STACK_WIND instead *
- * of jbr_ipc as it will not *
- * unwind to the previous *
- * translators like it will *
- * in case of jbr_ipc. *
- */
- STACK_WIND(
- new_frame, jbr_ipc_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc,
- FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata);
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not create local "
- "for new_frame");
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not send rollback ipc");
- }
- } else {
-#if defined(JBR_CG_NEED_FD)
- op_ret = local->successful_op_ret;
-#else
- op_ret = 0;
-#endif
- op_errno = 0;
- gf_msg_debug(this->name, 0,
- "Quorum has met. The operation has succeeded.");
- }
- }
-
- /*
- * Unrefing the reference taken in jbr_@NAME@ () *
- */
- dict_unref(local->xdata);
-
- STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
-
- return 0;
-
-err:
- STACK_UNWIND_STRICT(@NAME@, frame, -1, 0, @SHORT_ARGS@);
-
- return 0;
-}
diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py
deleted file mode 100755
index 616782bba45..00000000000
--- a/xlators/experimental/jbr-server/src/gen-fops.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/python3
-
-# This script generates the boilerplate versions of most fops and cbks in the
-# server. This allows the details of leadership-status checking, sequencing
-# between leader and followers (including fan-out), and basic error checking
-# to be centralized one place, with per-operation code kept to a minimum.
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname(sys.argv[0])
-gendir = os.path.join(curdir, '../../../../libglusterfs/src')
-sys.path.append(gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# We really want the callback argument list, even when we're generating fop
-# code, so we propagate here.
-# TBD: this should probably be right in generate.py
-for k, v in cbk_subs.items():
- fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
-
-# Stolen from old codegen.py
-def load_templates (path):
- templates = {}
- tmpl_re = re.compile("/\* template-name (.*) \*/")
- templates = {}
- t_name = None
- for line in open(path, "r").readlines():
- if not line:
- break
- m = tmpl_re.match(line)
- if m:
- if t_name:
- templates[t_name] = ''.join(t_contents)
- t_name = m.group(1).strip()
- t_contents = []
- elif t_name:
- t_contents.append(line)
- if t_name:
- templates[t_name] = ''.join(t_contents)
- return templates
-
-# We need two types of templates. The first, for pure read operations, just
-# needs to do a simple am-i-leader check (augmented to allow dirty reads).
-# The second, for pure writes, needs to do fan-out to followers between those
-# initial checks and local execution. There are other operations that don't
-# fit neatly into either category - e.g. lock ops or fsync - so we'll just have
-# to handle those manually. The table thus includes entries only for those we
-# can categorize. The special cases, plus any new operations we've never even
-# heard of, aren't in there.
-#
-# Various keywords can be used to define/undefine preprocessor symbols used
-# in the templates, on a per-function basis. For example, if the keyword here
-# is "fsync" (lowercase word or abbreviation) that will cause JBR_CG_FSYNC
-# (prefix plus uppercase version) to be defined above all of the generated code
-# for that fop.
-
-fop_table = {
- "access": "read",
- "create": "write",
- "discard": "write",
-# "entrylk": "read",
- "fallocate": "write",
-# "fentrylk": "read",
- "fgetxattr": "read",
-# "finodelk": "read",
-# "flush": "read",
- "fremovexattr": "write",
- "fsetattr": "write",
- "fsetxattr": "write",
- "fstat": "read",
-# "fsync": "read",
-# "fsyncdir": "read",
- "ftruncate": "write",
- "fxattrop": "write",
- "getxattr": "read",
-# "inodelk": "read",
- "link": "write",
- "lk": "write,queue",
-# "lookup": "read",
- "mkdir": "write",
- "mknod": "write",
- "open": "write",
- "opendir": "read",
- "rchecksum": "read",
- "readdir": "read",
- "readdirp": "read",
- "readlink": "read",
- "readv": "read",
- "removexattr": "write",
- "rename": "write",
- "rmdir": "write",
- "setattr": "write",
- "setxattr": "write",
- "stat": "read",
- "statfs": "read",
- "symlink": "write",
- "truncate": "write",
- "unlink": "write",
- "writev": "write,fsync,queue",
- "xattrop": "write",
- "ipc": "write",
-}
-
-# Mention those fops in the selective_generate table, for which
-# only a few common functions will be generated, and mention those
-# functions. Rest of the functions can be customized
-selective_generate = {
- "lk": "fop,dispatch,call_dispatch",
- "ipc": "dispatch,call_dispatch",
-}
-
-# Stolen from gen_fdl.py
-def gen_server (templates):
- fops_done = []
- for name in fop_table.keys():
- info = fop_table[name].split(",")
- kind = info[0]
- flags = info[1:]
-
- # generate all functions for the fops in fop_table
- # except for the ones in selective_generate for which
- # generate only the functions mentioned in the
- # selective_generate table
- gen_funcs = "fop,complete,continue,fan-in,dispatch, \
- call_dispatch,perform_local_op"
- if name in selective_generate:
- gen_funcs = selective_generate[name].split(",")
-
- if ("fsync" in flags) or ("queue" in flags):
- flags.append("need_fd")
- for fname in flags:
- print("#define JBR_CG_%s" % fname.upper())
-
- if 'complete' in gen_funcs:
- print(generate(templates[kind+"-complete"],
- name, cbk_subs))
-
- if 'continue' in gen_funcs:
- print(generate(templates[kind+"-continue"],
- name, fop_subs))
-
- if 'fan-in' in gen_funcs:
- print(generate(templates[kind+"-fan-in"],
- name, cbk_subs))
-
- if 'dispatch' in gen_funcs:
- print(generate(templates[kind+"-dispatch"],
- name, fop_subs))
-
- if 'call_dispatch' in gen_funcs:
- print(generate(templates[kind+"-call_dispatch"],
- name, fop_subs))
-
- if 'perform_local_op' in gen_funcs:
- print(generate(templates[kind+"-perform_local_op"],
- name, fop_subs))
-
- if 'fop' in gen_funcs:
- print(generate(templates[kind+"-fop"], name, fop_subs))
-
- for fname in flags:
- print("#undef JBR_CG_%s" % fname.upper())
- fops_done.append(name)
- # Just for fun, emit the fops table too.
- print("struct xlator_fops fops = {")
- for x in fops_done:
- print((" .%s = jbr_%s,"%(x, x)))
- print("};")
-
-tmpl = load_templates(sys.argv[1])
-for l in open(sys.argv[2], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_server(tmpl)
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h
deleted file mode 100644
index f225e988a5f..00000000000
--- a/xlators/experimental/jbr-server/src/jbr-internal.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#define LEADER_XATTR "user.jbr.leader"
-#define SECOND_CHILD(xl) (xl->children->next->xlator)
-#define RECONCILER_PATH JBR_SCRIPT_PREFIX "/reconciler.py"
-#define CHANGELOG_ENTRY_SIZE 128
-
-enum {
- gf_mt_jbr_private_t = gf_common_mt_end + 1,
- gf_mt_jbr_fd_ctx_t,
- gf_mt_jbr_inode_ctx_t,
- gf_mt_jbr_dirty_t,
- gf_mt_jbr_end
-};
-
-typedef enum jbr_recon_notify_ev_id_t {
- JBR_RECON_SET_LEADER = 1,
- JBR_RECON_ADD_CHILD = 2
-} jbr_recon_notify_ev_id_t;
-
-typedef struct _jbr_recon_notify_ev_s {
- jbr_recon_notify_ev_id_t id;
- uint32_t index; /* in case of add */
- struct list_head list;
-} jbr_recon_notify_ev_t;
-
-typedef struct {
- /*
- * This is a hack to allow a non-leader to accept requests while the
- * leader is down, and it only works for n=2. The way it works is that
- * "config_leader" indicates the state from our options (via init or
- * reconfigure) but "leader" is what the fop code actually looks at. If
- * config_leader is true, then leader will *always* be true as well,
- * giving that brick precedence. If config_leader is false, then
- * leader will only be true if there is no connection to the other
- * brick (tracked in jbr_notify).
- *
- * TBD: implement real leader election
- */
- gf_boolean_t config_leader;
- gf_boolean_t leader;
- uint8_t up_children;
- uint8_t n_children;
- char *vol_file;
- uint32_t current_term;
- uint32_t kid_state;
- gf_lock_t dirty_lock;
- struct list_head dirty_fds;
- uint32_t index;
- gf_lock_t index_lock;
- double quorum_pct;
- int term_fd;
- long term_total;
- long term_read;
- /*
- * This is a super-duper hack, but it will do for now. The reason it's
- * a hack is that we pass this to dict_set_static_bin, so we don't have
- * to mess around with allocating and freeing it on every single IPC
- * request, but it's totally not thread-safe. On the other hand, there
- * should only be one reconciliation thread running and calling these
- * functions at a time, so maybe that doesn't matter.
- *
- * TBD: re-evaluate how to manage this
- */
- char term_buf[CHANGELOG_ENTRY_SIZE];
- gf_boolean_t child_up; /* To maintain the state of *
- * the translator */
-} jbr_private_t;
-
-typedef struct {
- call_stub_t *stub;
- call_stub_t *qstub;
- uint32_t call_count;
- uint32_t successful_acks;
- uint32_t successful_op_ret;
- fd_t *fd;
- struct list_head qlinks;
- dict_t *xdata;
-} jbr_local_t;
-
-/*
- * This should match whatever changelog returns on the pre-op for us to pass
- * when we're ready for our post-op.
- */
-typedef uint32_t log_id_t;
-
-typedef struct {
- struct list_head links;
- log_id_t id;
-} jbr_dirty_list_t;
-
-typedef struct {
- fd_t *fd;
- struct list_head dirty_list;
- struct list_head fd_list;
-} jbr_fd_ctx_t;
-
-typedef struct {
- gf_lock_t lock;
- uint32_t active;
- struct list_head aqueue;
- uint32_t pending;
- struct list_head pqueue;
-} jbr_inode_ctx_t;
-
-void
-jbr_start_reconciler(xlator_t *this);
diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c
deleted file mode 100644
index 31df6ad5a1e..00000000000
--- a/xlators/experimental/jbr-server/src/jbr.c
+++ /dev/null
@@ -1,1676 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fnmatch.h>
-#include "call-stub.h"
-#include "defaults.h"
-#include "xlator.h"
-#include "glfs.h"
-#include "glfs-internal.h"
-#include "run.h"
-#include "common-utils.h"
-#include "syncop.h"
-#include "syscall.h"
-#include "compat-errno.h"
-#include "fdl.h"
-
-#include "jbr-internal.h"
-#include "jbr-messages.h"
-
-#define JBR_FLUSH_INTERVAL 5
-
-enum {
- /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */
- JBR_SERVER_IPC_BASE = 0x0e2d66a5,
- JBR_SERVER_TERM_RANGE,
- JBR_SERVER_OPEN_TERM,
- JBR_SERVER_NEXT_ENTRY
-};
-
-/*
- * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and *
- * jbr_lk_perform_local_op call it, before code is generated. *
- */
-int32_t
-jbr_lk_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno,
- fd_t *fd, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata);
-
-int32_t
-jbr_lk_dispatch(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-int32_t
-jbr_ipc_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno,
- int32_t op, dict_t *xdata);
-
-int32_t
-jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-/* Used to check the quorum of acks received after the fop
- * confirming the status of the fop on all the brick processes
- * for this particular subvolume
- */
-gf_boolean_t
-fop_quorum_check(xlator_t *this, double n_children, double current_state)
-{
- jbr_private_t *priv = NULL;
- gf_boolean_t result = _gf_false;
- double required = 0;
- double current = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- required = n_children * priv->quorum_pct;
-
- /*
- * Before performing the fop on the leader, we need to check,
- * if there is any merit in performing the fop on the leader.
- * In a case, where even a successful write on the leader, will
- * not meet quorum, there is no point in trying the fop on the
- * leader.
- * When this function is called after the leader has tried
- * performing the fop, this check will calculate quorum taking into
- * account the status of the fop on the leader. If the leader's
- * op_ret was -1, the complete function would account that by
- * decrementing successful_acks by 1
- */
-
- current = current_state * 100.0;
-
- if (current < required) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_QUORUM_NOT_MET,
- "Quorum not met. quorum_pct = %f "
- "Current State = %f, Required State = %f",
- priv->quorum_pct, current, required);
- } else
- result = _gf_true;
-
-out:
- return result;
-}
-
-jbr_inode_ctx_t *
-jbr_get_inode_ctx(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx_int = 0LL;
- jbr_inode_ctx_t *ctx_ptr;
-
- if (__inode_ctx_get(inode, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int;
- } else {
- ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_inode_ctx_t);
- if (ctx_ptr) {
- ctx_int = (uint64_t)(long)ctx_ptr;
- if (__inode_ctx_set(inode, this, &ctx_int) == 0) {
- LOCK_INIT(&ctx_ptr->lock);
- INIT_LIST_HEAD(&ctx_ptr->aqueue);
- INIT_LIST_HEAD(&ctx_ptr->pqueue);
- } else {
- GF_FREE(ctx_ptr);
- ctx_ptr = NULL;
- }
- }
- }
-
- return ctx_ptr;
-}
-
-jbr_fd_ctx_t *
-jbr_get_fd_ctx(xlator_t *this, fd_t *fd)
-{
- uint64_t ctx_int = 0LL;
- jbr_fd_ctx_t *ctx_ptr;
-
- if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
- } else {
- ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t);
- if (ctx_ptr) {
- if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx_ptr) == 0) {
- INIT_LIST_HEAD(&ctx_ptr->dirty_list);
- INIT_LIST_HEAD(&ctx_ptr->fd_list);
- } else {
- GF_FREE(ctx_ptr);
- ctx_ptr = NULL;
- }
- }
- }
-
- return ctx_ptr;
-}
-
-void
-jbr_mark_fd_dirty(xlator_t *this, jbr_local_t *local)
-{
- fd_t *fd = local->fd;
- jbr_fd_ctx_t *ctx_ptr;
- jbr_dirty_list_t *dirty;
- jbr_private_t *priv = this->private;
-
- /*
- * TBD: don't do any of this for O_SYNC/O_DIRECT writes.
- * Unfortunately, that optimization requires that we distinguish
- * between writev and other "write" calls, saving the original flags
- * and checking them in the callback. Too much work for too little
- * gain right now.
- */
-
- LOCK(&fd->lock);
- ctx_ptr = jbr_get_fd_ctx(this, fd);
- dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t);
- if (ctx_ptr && dirty) {
- gf_msg_trace(this->name, 0, "marking fd %p as dirty (%p)", fd, dirty);
- /* TBD: fill dirty->id from what changelog gave us */
- list_add_tail(&dirty->links, &ctx_ptr->dirty_list);
- if (list_empty(&ctx_ptr->fd_list)) {
- /* Add a ref so _release doesn't get called. */
- ctx_ptr->fd = fd_ref(fd);
- LOCK(&priv->dirty_lock);
- list_add_tail(&ctx_ptr->fd_list, &priv->dirty_fds);
- UNLOCK(&priv->dirty_lock);
- }
- } else {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "could not mark %p dirty", fd);
- if (ctx_ptr) {
- GF_FREE(ctx_ptr);
- }
- if (dirty) {
- GF_FREE(dirty);
- }
- }
- UNLOCK(&fd->lock);
-}
-
-#define JBR_TERM_XATTR "trusted.jbr.term"
-#define JBR_INDEX_XATTR "trusted.jbr.index"
-#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count"
-#define RECON_TERM_XATTR "trusted.jbr.recon-term"
-#define RECON_INDEX_XATTR "trusted.jbr.recon-index"
-
-int32_t
-jbr_leader_checks_and_init(call_frame_t *frame, xlator_t *this, int *op_errno,
- dict_t *xdata, fd_t *fd)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- int from_leader = _gf_false;
- int from_recon = _gf_false;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
-
- /*
- * Our first goal here is to avoid "split brain surprise" for users who
- * specify exactly 50% with two- or three-way replication. That means
- * either a more-than check against half the total replicas or an
- * at-least check against half of our peers (one less). Of the two,
- * only an at-least check supports the intuitive use of 100% to mean
- * all replicas must be present, because "more than 100%" will never
- * succeed regardless of which count we use. This leaves us with a
- * slightly non-traditional definition of quorum ("at least X% of peers
- * not including ourselves") but one that's useful enough to be worth
- * it.
- *
- * Note that n_children and up_children *do* include the local
- * subvolume, so we need to subtract one in each case.
- */
- if (priv->leader) {
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
-
- if (result == _gf_false) {
- /* Emulate the AFR client-side-quorum behavior. */
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Sufficient number of "
- "subvolumes are not up to meet quorum.");
- *op_errno = EROFS;
- goto out;
- }
- } else {
- if (xdata) {
- from_leader = !!dict_get(xdata, JBR_TERM_XATTR);
- from_recon = !!dict_get(xdata, RECON_TERM_XATTR) &&
- !!dict_get(xdata, RECON_INDEX_XATTR);
- } else {
- from_leader = from_recon = _gf_false;
- }
-
- /* follower/recon path *
- * just send it to local node *
- */
- if (!from_leader && !from_recon) {
- *op_errno = EREMOTE;
- goto out;
- }
- }
-
- local = mem_get0(this->local_pool);
- if (!local) {
- goto out;
- }
-
- if (fd)
- local->fd = fd_ref(fd);
- else
- local->fd = NULL;
-
- INIT_LIST_HEAD(&local->qlinks);
- local->successful_acks = 0;
- frame->local = local;
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_initialize_xdata_set_attrs(xlator_t *this, dict_t **xdata)
-{
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- uint32_t ti = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- if (!*xdata) {
- *xdata = dict_new();
- if (!*xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "failed to allocate xdata");
- goto out;
- }
- }
-
- if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set jbr-term");
- goto out;
- }
-
- LOCK(&priv->index_lock);
- ti = ++(priv->index);
- UNLOCK(&priv->index_lock);
- if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set index");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_remove_from_queue(call_frame_t *frame, xlator_t *this)
-{
- int32_t ret = -1;
- jbr_inode_ctx_t *ictx = NULL;
- jbr_local_t *local = NULL;
- jbr_local_t *next = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- if (local->qlinks.next != &local->qlinks) {
- list_del(&local->qlinks);
- ictx = jbr_get_inode_ctx(this, local->fd->inode);
- if (ictx) {
- LOCK(&ictx->lock);
- if (ictx->pending) {
- /*
- * TBD: dequeue *all* non-conflicting
- * reqs
- *
- * With the stub implementation there
- * can only be one request active at a
- * time (zero here) so it's not an
- * issue. In a real implementation
- * there might still be other active
- * requests to check against, and
- * multiple pending requests that could
- * continue.
- */
- gf_msg_debug(this->name, 0, "unblocking next request");
- --(ictx->pending);
- next = list_entry(ictx->pqueue.next, jbr_local_t, qlinks);
- list_del(&next->qlinks);
- list_add_tail(&next->qlinks, &ictx->aqueue);
- call_resume(next->qstub);
- } else {
- --(ictx->active);
- }
- UNLOCK(&ictx->lock);
- }
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
- dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_private_t *priv = NULL;
- jbr_local_t *local = NULL;
- gf_boolean_t result = _gf_false;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, err);
- GF_VALIDATE_OR_GOTO(this->name, flock, err);
- GF_VALIDATE_OR_GOTO(this->name, xdata, err);
-
- /*
- * Remove from queue for unlock operation only *
- * For lock operation, it will be done in fan-in *
- */
- if (flock->l_type == F_UNLCK) {
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto err;
- }
-
- /*
- * On a follower, unwind with the op_ret and op_errno. On a *
- * leader, if the fop is a locking fop, and its a failure, *
- * send fail, else call stub which will dispatch the fop to *
- * the followers. *
- * *
- * If the fop is a unlocking fop, check quorum. If quorum *
- * is met, then send success. Else Rollback on leader, *
- * followed by followers, and then send -ve ack to client. *
- */
- if (priv->leader) {
- /* Increase the successful acks if it's a success. */
- LOCK(&frame->lock);
- if (op_ret != -1)
- (local->successful_acks)++;
- UNLOCK(&frame->lock);
-
- if (flock->l_type == F_UNLCK) {
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks);
- if (result == _gf_false) {
- op_ret = -1;
- op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Quorum is not met. "
- "The operation has failed.");
-
- /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER *
- * FOLLOWED BY FOLLOWERS. */
- } else {
- op_ret = 0;
- op_errno = 0;
- }
-
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- } else {
- if (op_ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_LOCK_FAILURE,
- "The lock operation failed on "
- "the leader.");
-
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- } else {
- if (!local->stub) {
- goto err;
- }
-
- call_resume(local->stub);
- }
- }
- } else {
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- }
-
- return 0;
-
-err:
- if (local) {
- if (local->stub) {
- call_stub_destroy(local->stub);
- }
- if (local->qstub) {
- call_stub_destroy(local->qstub);
- }
- if (local->fd) {
- fd_unref(local->fd);
- }
- mem_put(local);
- }
- STACK_UNWIND_STRICT(lk, frame, -1, op_errno, flock, xdata);
- return 0;
-}
-
-int32_t
-jbr_lk_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *flock, dict_t *xdata)
-{
- uint8_t call_count = -1;
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- if (op_ret != -1) {
- /* Increment the number of successful acks *
- * received for the operation. *
- */
- (local->successful_acks)++;
- local->successful_op_ret = op_ret;
- }
- gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
- op_ret, op_errno, local->successful_acks);
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- /*
- * If the fop is a locking fop, then check quorum. If quorum *
- * is met, send successful ack to the client. If quorum is *
- * not met, then rollback locking on followers, followed by *
- * rollback of locking on leader, and then sending -ve ack *
- * to the client. *
- * *
- * If the fop is a unlocking fop, then call stub. *
- */
- if (flock->l_type == F_UNLCK) {
- call_resume(local->stub);
- } else {
- /*
- * Remove from queue for locking fops, for unlocking *
- * fops, it is taken care of in jbr_lk_complete *
- */
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto out;
-
- fd_unref(local->fd);
-
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks);
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Didn't receive enough acks to meet "
- "quorum. Failing the locking "
- "operation and initiating rollback on "
- "followers and the leader "
- "respectively.");
-
- /* TODO: PERFORM ROLLBACK OF LOCKING ON
- * FOLLOWERS, FOLLOWED BY ROLLBACK ON
- * LEADER.
- */
-
- STACK_UNWIND_STRICT(lk, frame, -1, EROFS, flock, xdata);
- } else {
- STACK_UNWIND_STRICT(lk, frame, 0, 0, flock, xdata);
- }
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/*
- * Called from leader for locking fop, being written as a separate
- * function so as to support queues.
- */
-int32_t
-jbr_perform_lk_on_leader(call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
-
- STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_perform_local_op(call_frame_t *frame, xlator_t *this, int *op_errno,
- fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
-
- /*
- * Check if the fop is a locking fop or unlocking fop, and
- * handle it accordingly. If it is a locking fop, take the
- * lock on leader first, and then send it to the followers.
- * If it is a unlocking fop, unlock the followers first,
- * and then on meeting quorum perform the unlock on the leader.
- */
- if (flock->l_type == F_UNLCK) {
- ret = jbr_lk_call_dispatch(frame, this, op_errno, fd, cmd, flock,
- xdata);
- if (ret)
- goto out;
- } else {
- jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
-
- if (!ictx) {
- *op_errno = EIO;
- goto out;
- }
-
- LOCK(&ictx->lock);
- if (ictx->active) {
- gf_msg_debug(this->name, 0, "queuing request due to conflict");
-
- local->qstub = fop_lk_stub(frame, jbr_perform_lk_on_leader, fd, cmd,
- flock, xdata);
- if (!local->qstub) {
- UNLOCK(&ictx->lock);
- goto out;
- }
- list_add_tail(&local->qlinks, &ictx->pqueue);
- ++(ictx->pending);
- UNLOCK(&ictx->lock);
- ret = 0;
- goto out;
- } else {
- list_add_tail(&local->qlinks, &ictx->aqueue);
- ++(ictx->active);
- }
- UNLOCK(&ictx->lock);
- ret = jbr_perform_lk_on_leader(frame, this, fd, cmd, flock, xdata);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_continue(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- /*
- * If it's a locking fop, then call dispatch to followers *
- * If it's a unlock fop, then perform the unlock operation *
- */
- if (flock->l_type == F_UNLCK) {
- STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
- } else {
- /*
- * Directly call jbr_lk_dispatch instead of appending *
- * in queue, which is done at jbr_lk_perform_local_op *
- * for locking fops *
- */
- ret = jbr_lk_dispatch(frame, this, fd, cmd, flock, xdata);
- if (ret) {
- STACK_UNWIND_STRICT(lk, frame, -1, 0, flock, xdata);
- goto out;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-uint8_t
-jbr_count_up_kids(jbr_private_t *priv)
-{
- uint8_t retval = 0;
- uint8_t i;
-
- for (i = 0; i < priv->n_children; ++i) {
- if (priv->kid_state & (1 << i)) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-/*
- * The fsync machinery looks a lot like that for any write call, but there are
- * some important differences that are easy to miss. First, we don't care
- * about the xdata that shows whether the call came from a leader or
- * reconciliation process. If we're the leader we fan out; if we're not we
- * don't. Second, we don't wait for followers before we issue the local call.
- * The code generation system could be updated to handle this, and still might
- * if we need to implement other "almost identical" paths (e.g. for open), but
- * a copy is more readable as long as it's just one.
- */
-
-int32_t
-jbr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- jbr_local_t *local = frame->local;
- gf_boolean_t unwind;
-
- LOCK(&frame->lock);
- unwind = !--(local->call_count);
- UNLOCK(&frame->lock);
-
- if (unwind) {
- STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- }
- return 0;
-}
-
-int32_t
-jbr_fsync_local_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- jbr_dirty_list_t *dirty;
- jbr_dirty_list_t *dtmp;
- jbr_local_t *local = frame->local;
-
- list_for_each_entry_safe(dirty, dtmp, &local->qlinks, links)
- {
- gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", local->fd,
- dirty);
- GF_FREE(dirty);
- }
-
- return jbr_fsync_cbk(frame, cookie, this, op_ret, op_errno, prebuf, postbuf,
- xdata);
-}
-
-int32_t
-jbr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- jbr_private_t *priv = this->private;
- jbr_local_t *local;
- uint64_t ctx_int = 0LL;
- jbr_fd_ctx_t *ctx_ptr;
- xlator_list_t *trav;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, xdata);
- return 0;
- }
- INIT_LIST_HEAD(&local->qlinks);
- frame->local = local;
-
- /* Move the dirty list from the fd to the fsync request. */
- LOCK(&fd->lock);
- if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
- list_splice_init(&ctx_ptr->dirty_list, &local->qlinks);
- }
- UNLOCK(&fd->lock);
-
- /* Issue the local call. */
- local->call_count = priv->leader ? priv->n_children : 1;
- STACK_WIND(frame, jbr_fsync_local_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
-
- /* Issue remote calls if we're the leader. */
- if (priv->leader) {
- for (trav = this->children->next; trav; trav = trav->next) {
- STACK_WIND(frame, jbr_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- }
- }
-
- return 0;
-}
-
-int32_t
-jbr_getxattr_special(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- dict_t *result;
- jbr_private_t *priv = this->private;
-
- if (!priv->leader) {
- STACK_UNWIND_STRICT(getxattr, frame, -1, EREMOTE, NULL, NULL);
- return 0;
- }
-
- if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) {
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
- }
-
- result = dict_new();
- if (!result) {
- goto dn_failed;
- }
-
- priv->up_children = jbr_count_up_kids(this->private);
- if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, priv->up_children) != 0) {
- goto dsu_failed;
- }
-
- STACK_UNWIND_STRICT(getxattr, frame, 0, 0, result, NULL);
- dict_unref(result);
- return 0;
-
-dsu_failed:
- dict_unref(result);
-dn_failed:
- STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-void
-jbr_flush_fd(xlator_t *this, jbr_fd_ctx_t *fd_ctx)
-{
- jbr_dirty_list_t *dirty;
- jbr_dirty_list_t *dtmp;
-
- list_for_each_entry_safe(dirty, dtmp, &fd_ctx->dirty_list, links)
- {
- gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", fd_ctx->fd,
- dirty);
- GF_FREE(dirty);
- }
-
- INIT_LIST_HEAD(&fd_ctx->dirty_list);
-}
-
-void *
-jbr_flush_thread(void *ctx)
-{
- xlator_t *this = ctx;
- jbr_private_t *priv = this->private;
- struct list_head dirty_fds;
- jbr_fd_ctx_t *fd_ctx;
- jbr_fd_ctx_t *fd_tmp;
- int ret;
-
- for (;;) {
- /*
- * We have to be very careful to avoid lock inversions here, so
- * we can't just hold priv->dirty_lock while we take and
- * release locks for each fd. Instead, we only hold dirty_lock
- * at the beginning of each iteration, as we (effectively) make
- * a copy of the current list head and then clear the original.
- * This leads to four scenarios for adding the first entry to
- * an fd and potentially putting it on the global list.
- *
- * (1) While we're asleep. No lock contention, it just gets
- * added and will be processed on the next iteration.
- *
- * (2) After we've made a local copy, but before we've started
- * processing that fd. The new entry will be added to the
- * fd (under its lock), and we'll process it on the current
- * iteration.
- *
- * (3) While we're processing the fd. They'll block on the fd
- * lock, then see that the list is empty and put it on the
- * global list. We'll process it here on the next
- * iteration.
- *
- * (4) While we're working, but after we've processed that fd.
- * Same as (1) as far as that fd is concerned.
- */
- INIT_LIST_HEAD(&dirty_fds);
- LOCK(&priv->dirty_lock);
- list_splice_init(&priv->dirty_fds, &dirty_fds);
- UNLOCK(&priv->dirty_lock);
-
- list_for_each_entry_safe(fd_ctx, fd_tmp, &dirty_fds, fd_list)
- {
- ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, NULL, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to fsync %p (%d)", fd_ctx->fd, -ret);
- }
-
- LOCK(&fd_ctx->fd->lock);
- jbr_flush_fd(this, fd_ctx);
- list_del_init(&fd_ctx->fd_list);
- UNLOCK(&fd_ctx->fd->lock);
- fd_unref(fd_ctx->fd);
- }
-
- sleep(JBR_FLUSH_INTERVAL);
- }
-
- return NULL;
-}
-
-int32_t
-jbr_get_changelog_dir(xlator_t *this, char **cl_dir_p)
-{
- xlator_t *cl_xl;
-
- /* Find our changelog translator. */
- cl_xl = this;
- while (cl_xl) {
- if (strcmp(cl_xl->type, "features/changelog") == 0) {
- break;
- }
- cl_xl = cl_xl->children->xlator;
- }
- if (!cl_xl) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL,
- "failed to find changelog translator");
- return ENOENT;
- }
-
- /* Find the actual changelog directory. */
- if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL,
- "failed to find changelog-dir for %s", cl_xl->name);
- return ENODATA;
- }
-
- return 0;
-}
-
-void
-jbr_get_terms(call_frame_t *frame, xlator_t *this)
-{
- int32_t op_errno = 0;
- char *cl_dir = NULL;
- int32_t term_first = -1;
- int32_t term_contig = -1;
- int32_t term_last = -1;
- int term_num = 0;
- char *probe_str = NULL;
- dict_t *my_xdata = NULL;
- DIR *fp = NULL;
- struct dirent *entry = NULL;
- struct dirent scratch[2] = {
- {
- 0,
- },
- };
-
- op_errno = jbr_get_changelog_dir(this, &cl_dir);
- if (op_errno) {
- goto err; /* Error was already logged. */
- }
- op_errno = ENODATA; /* Most common error after this. */
-
- fp = sys_opendir(cl_dir);
- if (!fp) {
- op_errno = errno;
- goto err;
- }
-
- /* Find first and last terms. */
- for (;;) {
- errno = 0;
- entry = sys_readdir(fp, scratch);
- if (!entry || errno != 0) {
- if (errno != 0) {
- op_errno = errno;
- goto err;
- }
- break;
- }
-
- if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) {
- continue;
- }
- /* +5 points to the character after the period */
- term_num = atoi(entry->d_name + 5);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "%s => %d",
- entry->d_name, term_num);
- if (term_num < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INVALID,
- "invalid term file name %s", entry->d_name);
- op_errno = EINVAL;
- goto err;
- }
- if ((term_first < 0) || (term_first > term_num)) {
- term_first = term_num;
- }
- if ((term_last < 0) || (term_last < term_num)) {
- term_last = term_num;
- }
- }
- if ((term_first < 0) || (term_last < 0)) {
- /* TBD: are we *sure* there should always be at least one? */
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "no terms found");
- op_errno = EINVAL;
- goto err;
- }
-
- (void)sys_closedir(fp);
- fp = NULL;
-
- /*
- * Find term_contig, which is the earliest term for which there are
- * no gaps between it and term_last.
- */
- for (term_contig = term_last; term_contig > 0; --term_contig) {
- if (gf_asprintf(&probe_str, "%s/TERM.%d", cl_dir, term_contig - 1) <=
- 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to format term %d", term_contig - 1);
- goto err;
- }
- if (sys_access(probe_str, F_OK) != 0) {
- GF_FREE(probe_str);
- probe_str = NULL;
- break;
- }
- GF_FREE(probe_str);
- probe_str = NULL;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "found terms %d-%d (%d)",
- term_first, term_last, term_contig);
-
- /* Return what we've found */
- my_xdata = dict_new();
- if (!my_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to allocate reply dictionary");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-first", term_first) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-first");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-contig");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-last", term_last) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-last");
- goto err;
- }
-
- /* Finally! */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata);
- dict_unref(my_xdata);
- return;
-
-err:
- if (fp) {
- (void)sys_closedir(fp);
- }
- if (my_xdata) {
- dict_unref(my_xdata);
- }
-
- if (probe_str)
- GF_FREE(probe_str);
-
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
-}
-
-long
-get_entry_count(xlator_t *this, int fd)
-{
- struct stat buf;
- long min; /* last entry not known to be empty */
- long max; /* first entry known to be empty */
- long curr;
- char entry[CHANGELOG_ENTRY_SIZE];
-
- if (sys_fstat(fd, &buf) < 0) {
- return -1;
- }
-
- min = 0;
- max = buf.st_size / CHANGELOG_ENTRY_SIZE;
-
- while ((min + 1) < max) {
- curr = (min + max) / 2;
- if (sys_lseek(fd, curr * CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) {
- return -1;
- }
- if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) {
- return -1;
- }
- if ((entry[0] == '_') && (entry[1] == 'P')) {
- min = curr;
- } else {
- max = curr;
- }
- }
-
- if (sys_lseek(fd, 0, SEEK_SET) < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to reset offset");
- }
- return max;
-}
-
-void
-jbr_open_term(call_frame_t *frame, xlator_t *this, dict_t *xdata)
-{
- int32_t op_errno;
- char *cl_dir;
- char *term;
- char *path = NULL;
- jbr_private_t *priv = this->private;
-
- op_errno = jbr_get_changelog_dir(this, &cl_dir);
- if (op_errno) {
- goto err;
- }
-
- if (dict_get_str(xdata, "term", &term) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "missing term");
- op_errno = ENODATA;
- goto err;
- }
-
- if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to construct path");
- op_errno = ENOMEM;
- goto err;
- }
-
- if (priv->term_fd >= 0) {
- sys_close(priv->term_fd);
- }
- priv->term_fd = open(path, O_RDONLY);
- if (priv->term_fd < 0) {
- op_errno = errno;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to open term file");
- goto err;
- }
-
- priv->term_total = get_entry_count(this, priv->term_fd);
- if (priv->term_total < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "failed to get entry count");
- sys_close(priv->term_fd);
- priv->term_fd = -1;
- op_errno = EIO;
- goto err;
- }
- priv->term_read = 0;
-
- /* Success! */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
- GF_FREE(path);
- return;
-
-err:
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
- GF_FREE(path);
-}
-
-void
-jbr_next_entry(call_frame_t *frame, xlator_t *this)
-{
- int32_t op_errno = ENOMEM;
- jbr_private_t *priv = this->private;
- ssize_t nbytes;
- dict_t *my_xdata;
-
- if (priv->term_fd < 0) {
- op_errno = EBADFD;
- goto err;
- }
-
- if (priv->term_read >= priv->term_total) {
- op_errno = ENODATA;
- goto err;
- }
-
- nbytes = sys_read(priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE);
- if (nbytes < CHANGELOG_ENTRY_SIZE) {
- if (nbytes < 0) {
- op_errno = errno;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "error reading next entry: %s", strerror(errno));
- } else {
- op_errno = EIO;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "got %zd/%d bytes for next entry", nbytes,
- CHANGELOG_ENTRY_SIZE);
- }
- goto err;
- }
- ++(priv->term_read);
-
- my_xdata = dict_new();
- if (!my_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to allocate reply xdata");
- goto err;
- }
-
- if (dict_set_static_bin(my_xdata, "data", priv->term_buf,
- CHANGELOG_ENTRY_SIZE) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to assign reply xdata");
- goto err;
- }
-
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata);
- dict_unref(my_xdata);
- return;
-
-err:
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
-}
-
-int32_t
-jbr_ipc_fan_in(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- jbr_local_t *local = NULL;
- int32_t ret = -1;
- uint8_t call_count;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
-#if defined(JBR_CG_QUEUE)
- ret = jbr_remove_from_queue(frame, this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC,
- "Failed to remove from queue.");
- }
-#endif
- /*
- * Unrefing the reference taken in continue() or complete() *
- */
- dict_unref(local->xdata);
- STACK_DESTROY(frame->root);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- jbr_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- jbr_ipc_call_dispatch(frame, this, &op_errno, FDL_IPC_JBR_SERVER_ROLLBACK,
- local->xdata);
-out:
- return 0;
-}
-
-int32_t
-jbr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- switch (op) {
- case JBR_SERVER_TERM_RANGE:
- jbr_get_terms(frame, this);
- break;
- case JBR_SERVER_OPEN_TERM:
- jbr_open_term(frame, this, xdata);
- break;
- case JBR_SERVER_NEXT_ENTRY:
- jbr_next_entry(frame, this);
- break;
- case FDL_IPC_JBR_SERVER_ROLLBACK:
- /*
- * Just send the fop down to fdl. Need not *
- * dispatch it to other bricks in the sub- *
- * volume, as it will be done where the op *
- * has failed. *
- */
- default:
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, xdata);
- }
-
- return 0;
-}
-
-#pragma generate
-
-int32_t
-jbr_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx = 0LL;
-
- if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) {
- GF_FREE((void *)(long)ctx);
- }
-
- return 0;
-}
-
-int32_t
-jbr_release(xlator_t *this, fd_t *fd)
-{
- uint64_t ctx = 0LL;
-
- if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) {
- GF_FREE((void *)(long)ctx);
- }
-
- return 0;
-}
-
-struct xlator_cbks cbks = {
- .forget = jbr_forget,
- .release = jbr_release,
-};
-
-int
-jbr_reconfigure(xlator_t *this, dict_t *options)
-{
- jbr_private_t *priv = this->private;
-
- GF_OPTION_RECONF("leader", priv->config_leader, options, bool, err);
- GF_OPTION_RECONF("quorum-percent", priv->quorum_pct, options, percent, err);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "reconfigure called, config_leader = %d, quorum_pct = %.1f\n",
- priv->leader, priv->quorum_pct);
-
- priv->leader = priv->config_leader;
-
- return 0;
-
-err:
- return -1;
-}
-
-int
-jbr_get_child_index(xlator_t *this, xlator_t *kid)
-{
- xlator_list_t *trav;
- int retval = -1;
-
- for (trav = this->children; trav; trav = trav->next) {
- ++retval;
- if (trav->xlator == kid) {
- return retval;
- }
- }
-
- return -1;
-}
-
-/*
- * Child notify handling is unreasonably FUBAR. Sometimes we'll get a
- * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it.
- * Other times we won't. Because it's effectively random (probably racy), we
- * can't just maintain a count. We actually have to keep track of the state
- * for each child separately, to filter out the bogus CHILD_DOWN events, and
- * then generate counts on demand.
- */
-int
-jbr_notify(xlator_t *this, int event, void *data, ...)
-{
- jbr_private_t *priv = this->private;
- int index = -1;
- int ret = -1;
- gf_boolean_t result = _gf_false;
- gf_boolean_t relevant = _gf_false;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- index = jbr_get_child_index(this, data);
- if (index >= 0) {
- /* Check if the child was previously down
- * and it's not a false CHILD_UP
- */
- if (!(priv->kid_state & (1 << index))) {
- relevant = _gf_true;
- }
-
- priv->kid_state |= (1 << index);
- priv->up_children = jbr_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_UP for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- if (!priv->config_leader && (priv->up_children > 1)) {
- priv->leader = _gf_false;
- }
-
- /* If it's not relevant, or we have already *
- * sent CHILD_UP just break */
- if (!relevant || priv->child_up)
- break;
-
- /* If it's not a leader, just send the notify up */
- if (!priv->leader) {
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_true;
- break;
- }
-
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Not enough children "
- "are up to meet quorum. Waiting to "
- "send CHILD_UP from leader");
- } else {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Enough children are up "
- "to meet quorum. Sending CHILD_UP "
- "from leader");
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_true;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- index = jbr_get_child_index(this, data);
- if (index >= 0) {
- /* Check if the child was previously up
- * and it's not a false CHILD_DOWN
- */
- if (priv->kid_state & (1 << index)) {
- relevant = _gf_true;
- }
- priv->kid_state &= ~(1 << index);
- priv->up_children = jbr_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_DOWN for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- if (!priv->config_leader && (priv->up_children < 2) &&
- relevant) {
- priv->leader = _gf_true;
- }
-
- /* If it's not relevant, or we have already *
- * sent CHILD_DOWN just break */
- if (!relevant || !priv->child_up)
- break;
-
- /* If it's not a leader, just break coz we shouldn't *
- * propagate the failure from the failure till it *
- * itself goes down *
- */
- if (!priv->leader) {
- break;
- }
-
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Enough children are "
- "to down to fail quorum. "
- "Sending CHILD_DOWN from leader");
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_false;
- } else {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Not enough children "
- "are down to fail quorum. Waiting to "
- "send CHILD_DOWN from leader");
- }
- }
- break;
- default:
- ret = default_notify(this, event, data);
- }
-
- return ret;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
-
- ret = xlator_mem_acct_init(this, gf_mt_jbr_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-void
-jbr_deallocate_priv(jbr_private_t *priv)
-{
- if (!priv) {
- return;
- }
-
- GF_FREE(priv);
-}
-
-int32_t
-jbr_init(xlator_t *this)
-{
- xlator_list_t *remote;
- xlator_list_t *local;
- jbr_private_t *priv = NULL;
- xlator_list_t *trav;
- pthread_t kid;
- extern xlator_t global_xlator;
- glusterfs_ctx_t *oldctx = global_xlator.ctx;
-
- /*
- * Any fop that gets special treatment has to be patched in here,
- * because the compiled-in table is produced by the code generator and
- * only contains generated functions. Note that we have to go through
- * this->fops because of some dynamic-linking strangeness; modifying
- * the static table doesn't work.
- */
- this->fops->getxattr = jbr_getxattr_special;
- this->fops->fsync = jbr_fsync;
-
- local = this->children;
- if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "no local subvolume");
- goto err;
- }
-
- remote = local->next;
- if (!remote) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "no remote subvolumes");
- goto err;
- }
-
- this->local_pool = mem_pool_new(jbr_local_t, 128);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to create jbr_local_t pool");
- goto err;
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbr_private_t);
- if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "could not allocate priv");
- goto err;
- }
-
- for (trav = this->children; trav; trav = trav->next) {
- ++(priv->n_children);
- }
-
- LOCK_INIT(&priv->dirty_lock);
- LOCK_INIT(&priv->index_lock);
- INIT_LIST_HEAD(&priv->dirty_fds);
- priv->term_fd = -1;
-
- this->private = priv;
-
- GF_OPTION_INIT("leader", priv->config_leader, bool, err);
- GF_OPTION_INIT("quorum-percent", priv->quorum_pct, percent, err);
-
- priv->leader = priv->config_leader;
- priv->child_up = _gf_false;
-
- if (gf_thread_create(&kid, NULL, jbr_flush_thread, this, "jbrflush") != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "could not start flush thread");
- /* TBD: treat this as a fatal error? */
- }
-
- /*
- * Calling glfs_new changes old->ctx, even if THIS still points
- * to global_xlator. That causes problems later in the main
- * thread, when gf_log_dump_graph tries to use the FILE after
- * we've mucked with it and gets a segfault in __fprintf_chk.
- * We can avoid all that by undoing the damage before we
- * continue.
- */
- global_xlator.ctx = oldctx;
-
- return 0;
-
-err:
- jbr_deallocate_priv(priv);
- return -1;
-}
-
-void
-jbr_fini(xlator_t *this)
-{
- jbr_deallocate_priv(this->private);
-}
-
-class_methods_t class_methods = {
- .init = jbr_init,
- .fini = jbr_fini,
- .reconfigure = jbr_reconfigure,
- .notify = jbr_notify,
-};
-
-struct volume_options options[] = {
- {.key = {"leader"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
- .description = "Start in the leader role. This is only for "
- "bootstrapping the code, and should go away when we "
- "have real leader election."},
- {.key = {"vol-name"},
- .type = GF_OPTION_TYPE_STR,
- .description = "volume name"},
- {.key = {"my-name"},
- .type = GF_OPTION_TYPE_STR,
- .description = "brick name in form of host:/path"},
- {.key = {"etcd-servers"},
- .type = GF_OPTION_TYPE_STR,
- .description = "list of comma separated etc servers"},
- {.key = {"subvol-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "UUID for this JBR (sub)volume"},
- {.key = {"quorum-percent"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "50.0",
- .description = "percentage of rep_count-1 that must be up"},
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/posix2/Makefile.am b/xlators/experimental/posix2/Makefile.am
deleted file mode 100644
index 74e5ab0f5bc..00000000000
--- a/xlators/experimental/posix2/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = common mds ds
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/README.md b/xlators/experimental/posix2/README.md
deleted file mode 100644
index 955a98d061e..00000000000
--- a/xlators/experimental/posix2/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# POSIX2 Experimental README
-
-POSIX2 is an implementation of modified storage translator to cater to DHT2
-on disk needs.
-
-For further understanding, refer to xlators/experimental/dht2/README.md for
-details regarding POSIX2
diff --git a/xlators/experimental/posix2/TODO.md b/xlators/experimental/posix2/TODO.md
deleted file mode 100644
index 20cd1e89c1d..00000000000
--- a/xlators/experimental/posix2/TODO.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# POSIX2 TODO List
-
-<Items will be added as code is pulled into the repository> \ No newline at end of file
diff --git a/xlators/experimental/posix2/common/Makefile.am b/xlators/experimental/posix2/common/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/common/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/common/src/Makefile.am b/xlators/experimental/posix2/common/src/Makefile.am
deleted file mode 100644
index 07533d2bf37..00000000000
--- a/xlators/experimental/posix2/common/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libposix2common.la
-
-posix2_common_sources = posix2-common.c
-
-libposix2common_la_SOURCES = $(posix2_common_sources)
-libposix2common_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-libposix2common_la_CFLAGS = -Wall $(GF_CFLAGS)
-
-libposix2common_la_CPPFLAGS = $(GF_CPPFLAGS)
-libposix2common_la_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-libposix2common_la_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-libposix2common_la_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/common/src/posix2-common.c b/xlators/experimental/posix2/common/src/posix2-common.c
deleted file mode 100644
index 14b51d538b2..00000000000
--- a/xlators/experimental/posix2/common/src/posix2-common.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-common.c
- * This file contains common routines across ds and mds posix xlators
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "statedump.h"
diff --git a/xlators/experimental/posix2/ds/Makefile.am b/xlators/experimental/posix2/ds/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/ds/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/ds/src/Makefile.am b/xlators/experimental/posix2/ds/src/Makefile.am
deleted file mode 100644
index 7a792a8d07b..00000000000
--- a/xlators/experimental/posix2/ds/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = posix2-ds.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-posix2_ds_sources = posix2-ds-main.c
-
-posix2_ds_la_SOURCES = $(posix2_ds_sources)
-posix2_ds_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix2_ds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-posix2_ds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/ds/src/posix2-ds-main.c b/xlators/experimental/posix2/ds/src/posix2-ds-main.c
deleted file mode 100644
index 4e399a98ed4..00000000000
--- a/xlators/experimental/posix2/ds/src/posix2-ds-main.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-ds-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-posix2_ds_init(xlator_t *this)
-{
- if (this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "This (%s) is a leaf xlator, but found children", this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-posix2_ds_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = posix2_ds_init,
- .fini = posix2_ds_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/posix2/mds/Makefile.am b/xlators/experimental/posix2/mds/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/mds/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/mds/src/Makefile.am b/xlators/experimental/posix2/mds/src/Makefile.am
deleted file mode 100644
index 0681cb73c45..00000000000
--- a/xlators/experimental/posix2/mds/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = posix2-mds.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-posix2_mds_sources = posix2-mds-main.c
-
-posix2_mds_la_SOURCES = $(posix2_mds_sources)
-posix2_mds_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix2_mds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-posix2_mds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/mds/src/posix2-mds-main.c b/xlators/experimental/posix2/mds/src/posix2-mds-main.c
deleted file mode 100644
index 58da05f2091..00000000000
--- a/xlators/experimental/posix2/mds/src/posix2-mds-main.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-mds-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-posix2_mds_init(xlator_t *this)
-{
- if (this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "This (%s) is a leaf xlator, but found children", this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-posix2_mds_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = posix2_mds_init,
- .fini = posix2_mds_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 194634b003d..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC
CLOUDSYNC_DIR = cloudsync
endif
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
compress changelog gfid-access snapview-client snapview-server trash \
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
- utime
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h
index 0f77cfd05f4..05d18374c46 100644
--- a/xlators/features/arbiter/src/arbiter-mem-types.h
+++ b/xlators/features/arbiter/src/arbiter-mem-types.h
@@ -9,7 +9,7 @@
#ifndef __ARBITER_MEM_TYPES_H__
#define __ARBITER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_arbiter_mem_types_ {
gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1,
diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c
index 4af68f9ba52..83a97e3354b 100644
--- a/xlators/features/arbiter/src/arbiter.c
+++ b/xlators/features/arbiter/src/arbiter.c
@@ -10,9 +10,9 @@
#include "arbiter.h"
#include "arbiter-mem-types.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
static arbiter_inode_ctx_t *
__arbiter_inode_ctx_get(inode_t *inode, xlator_t *this)
@@ -365,3 +365,16 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h
index ce1c909f70f..546db7b751a 100644
--- a/xlators/features/arbiter/src/arbiter.h
+++ b/xlators/features/arbiter/src/arbiter.h
@@ -11,8 +11,8 @@
#ifndef _ARBITER_H
#define _ARBITER_H
-#include "locking.h"
-#include "common-utils.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
typedef struct arbiter_inode_ctx_ {
struct iatt iattbuf;
diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h
index 93ccab633ce..71ed7898d9c 100644
--- a/xlators/features/barrier/src/barrier-mem-types.h
+++ b/xlators/features/barrier/src/barrier-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __BARRIER_MEM_TYPES_H__
#define __BARRIER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_barrier_mem_types_ {
gf_barrier_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c
index 1c5c5ffdc22..852bbacb99d 100644
--- a/xlators/features/barrier/src/barrier.c
+++ b/xlators/features/barrier/src/barrier.c
@@ -9,10 +9,10 @@
*/
#include "barrier.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
-#include "statedump.h"
+#include <glusterfs/statedump.h>
void
barrier_local_set_gfid(call_frame_t *frame, uuid_t gfid, xlator_t *this)
@@ -461,16 +461,14 @@ out:
int
notify(xlator_t *this, int event, void *data, ...)
{
- barrier_priv_t *priv = NULL;
+ barrier_priv_t *priv = this->private;
dict_t *dict = NULL;
- gf_boolean_t past = _gf_false;
int ret = -1;
int barrier_enabled = _gf_false;
struct list_head queue = {
0,
};
- priv = this->private;
GF_ASSERT(priv);
INIT_LIST_HEAD(&queue);
@@ -488,35 +486,27 @@ notify(xlator_t *this, int event, void *data, ...)
LOCK(&priv->lock);
{
- past = priv->barrier_enabled;
-
- switch (past) {
- case _gf_false:
- if (barrier_enabled) {
- ret = __barrier_enable(this, priv);
- if (ret)
- goto unlock;
- } else {
- gf_log(this->name, GF_LOG_ERROR,
- "Already disabled.");
- goto unlock;
- }
- break;
-
- case _gf_true:
- if (!barrier_enabled) {
- __barrier_disable(this, &queue);
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Already enabled");
- goto unlock;
- }
- break;
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already disabled.");
+ goto post_unlock;
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ ret = 0;
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already enabled");
+ goto post_unlock;
+ }
}
- ret = 0;
}
- unlock:
UNLOCK(&priv->lock);
-
+ post_unlock:
if (!list_empty(&queue))
barrier_dequeue_all(this, &queue);
@@ -536,7 +526,6 @@ int
reconfigure(xlator_t *this, dict_t *options)
{
barrier_priv_t *priv = NULL;
- gf_boolean_t past = _gf_false;
int ret = -1;
gf_boolean_t barrier_enabled = _gf_false;
uint32_t timeout = {
@@ -556,23 +545,17 @@ reconfigure(xlator_t *this, dict_t *options)
LOCK(&priv->lock);
{
- past = priv->barrier_enabled;
-
- switch (past) {
- case _gf_false:
- if (barrier_enabled) {
- ret = __barrier_enable(this, priv);
- if (ret) {
- goto unlock;
- }
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ if (ret) {
+ goto unlock;
}
- break;
-
- case _gf_true:
- if (!barrier_enabled) {
- __barrier_disable(this, &queue);
- }
- break;
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ }
}
priv->timeout.tv_sec = timeout;
ret = 0;
@@ -746,13 +729,13 @@ barrier_dump_priv(xlator_t *this)
gf_proc_dump_build_key(key, "xlator.features.barrier", "priv");
gf_proc_dump_add_section("%s", key);
+ gf_proc_dump_build_key(key, "barrier", "enabled");
LOCK(&priv->lock);
{
- gf_proc_dump_build_key(key, "barrier", "enabled");
gf_proc_dump_write(key, "%d", priv->barrier_enabled);
gf_proc_dump_build_key(key, "barrier", "timeout");
- gf_proc_dump_write(key, "%" PRId64, priv->timeout.tv_sec);
+ gf_proc_dump_write(key, "%ld", priv->timeout.tv_sec);
if (priv->barrier_enabled) {
gf_proc_dump_build_key(key, "barrier", "queue_size");
gf_proc_dump_write(key, "%d", priv->queue_size);
@@ -809,3 +792,18 @@ struct volume_options options[] = {
"blocked acknowledgements are sent to the application"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "barrier",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h
index d11d71d151e..1337f311f7d 100644
--- a/xlators/features/barrier/src/barrier.h
+++ b/xlators/features/barrier/src/barrier.h
@@ -12,9 +12,9 @@
#define __BARRIER_H__
#include "barrier-mem-types.h"
-#include "xlator.h"
-#include "timer.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/call-stub.h>
#define BARRIER_FOP_CBK(fop_name, label, frame, this, params...) \
do { \
@@ -65,11 +65,12 @@
typedef struct {
gf_timer_t *timer;
- gf_boolean_t barrier_enabled;
gf_lock_t lock;
struct list_head queue;
struct timespec timeout;
uint32_t queue_size;
+ gf_boolean_t barrier_enabled;
+ char _pad[3]; /* manual padding */
} barrier_priv_t;
int
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
index 6f59933a31d..5bc5103a27c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -11,7 +11,7 @@
#ifndef _BITROT_BITD_MESSAGES_H_
#define _BITROT_BITD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -47,6 +47,55 @@ GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED,
BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED,
BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG,
BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED,
- BRB_MSG_SCRUB_WAIT_FAILED);
+ BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED,
+ BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC,
+ BRB_MSG_SAVING_HASH_FAILED);
+#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode"
+#define BRB_MSG_READV_FAILED_STR "readv failed"
+#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed"
+#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed"
+#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature"
+#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed"
+#define BRB_MSG_OP_FAILED_STR "failed on object"
+#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing"
+#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed"
+#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer"
+#define BRB_MSG_GET_SUBVOL_FAILED_STR \
+ "failed to get the subvolume for the brick"
+#define BRB_MSG_PATH_FAILED_STR "path failed"
+#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping"
+#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \
+ "PArtial version xattr presence detected, ignoring"
+#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing"
+#define BRB_MSG_CRAWLING_START_STR \
+ "Crawling brick, scanning for unsigned objects"
+#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick"
+#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed"
+#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn"
+#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick"
+#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed"
+#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info"
+#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread"
+#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick"
+#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \
+ "callback handler for subvolume failed"
+#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child"
+#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume"
+#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \
+ "on demand scrub schedule failed. Scrubber is not in pending state."
+#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \
+ "Could not schedule ondemand scrubbing. Scrubbing will continue " \
+ "according to old frequency."
+#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed"
+#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info"
+#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer"
+#define BRB_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable"
+#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded"
+#define BRB_MSG_SAVING_HASH_FAILED_STR \
+ "failed to allocate memory for saving hash of the object"
#endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
index 34e20f9df11..5cef2ffa5e5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
}
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv)
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
{
if (!scrub_stat)
return;
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_start_time = time;
}
pthread_mutex_unlock(&scrub_stat->lock);
}
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv)
+ time_t time)
{
int lst_size = 0;
@@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_end_time = time;
- scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec -
- scrub_stat->scrub_start_tv.tv_sec;
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
index 24128b90a66..f022aa831eb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -15,20 +15,22 @@
#include <sys/time.h>
#include <pthread.h>
+#include <glusterfs/common-utils.h>
+
struct br_scrub_stats {
- uint64_t scrubbed_files; /* Total number of scrubbed file */
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
- uint64_t unsigned_files; /* Total number of unsigned file */
+ uint64_t unsigned_files; /* Total number of unsigned files. */
- uint64_t scrub_duration; /* Duration of last scrub */
+ uint64_t scrub_duration; /* Duration of last scrub. */
- char last_scrub_time[1024]; /*last scrub completion time */
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
- struct timeval scrub_start_tv; /* Scrubbing starting time*/
+ time_t scrub_start_time; /* Scrubbing starting time. */
- struct timeval scrub_end_tv; /* Scrubbing finishing time */
+ time_t scrub_end_time; /* Scrubbing finishing time. */
- int8_t scrub_running; /* Scrub running or not */
+ int8_t scrub_running; /* Whether scrub running or not. */
pthread_mutex_t lock;
};
@@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
void
br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv);
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv);
+ time_t time);
#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index b856c168eb7..289dd53f610 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -12,15 +12,15 @@
#include <ctype.h>
#include <sys/uio.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "bit-rot-scrub.h"
#include <pthread.h>
#include "bit-rot-bitd-messages.h"
#include "bit-rot-scrub-status.h"
-#include "events.h"
+#include <glusterfs/events.h>
struct br_scrubbers {
pthread_t scrubthread;
@@ -130,6 +130,8 @@ bitd_scrub_post_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
(void)memcpy(*signature, signptr, sizeof(br_isignature_out_t) + signlen);
+ (*signature)->signaturelen = signlen;
+
unref_dict:
dict_unref(xattr);
out:
@@ -222,7 +224,7 @@ bitd_compare_ckum(xlator_t *this, br_isignature_out_t *sign, unsigned char *md,
GF_VALIDATE_OR_GOTO(this->name, md, out);
GF_VALIDATE_OR_GOTO(this->name, entry, out);
- if (strncmp(sign->signature, (char *)md, strlen(sign->signature)) == 0) {
+ if (strncmp(sign->signature, (char *)md, sign->signaturelen) == 0) {
gf_msg_debug(this->name, 0,
"%s [GFID: %s | Brick: %s] "
"matches calculated checksum",
@@ -599,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this)
static void
br_scrubber_log_time(xlator_t *this, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
br_private_t *priv = NULL;
+ time_t now = 0;
+ now = gf_time();
priv = this->private;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
- br_update_scrub_start_time(&priv->scrub_stat, &tv);
+ br_update_scrub_start_time(&priv->scrub_stat, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
"Scrubbing %s at %s", sfx, timestr);
} else {
- br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv);
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
"Scrubbing %s at %s", sfx, timestr);
}
@@ -626,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx)
static void
br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
@@ -718,8 +716,10 @@ br_scrubber_exit_control(xlator_t *this)
if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
(void)br_fsscan_activate(this);
} else {
+ UNLOCK(&scrub_monitor->lock);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
"Volume waiting to get rescheduled..");
+ return;
}
}
UNLOCK(&scrub_monitor->lock);
@@ -915,10 +915,7 @@ br_fsscan_schedule(xlator_t *this)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
- struct timeval tv = {
- 0,
- };
- char timestr[1024] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
struct br_scrubber *fsscrub = NULL;
@@ -929,8 +926,7 @@ br_fsscan_schedule(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&tv, NULL);
- scrub_monitor->boot = tv.tv_sec;
+ scrub_monitor->boot = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
@@ -971,12 +967,10 @@ int32_t
br_fsscan_activate(xlator_t *this)
{
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -985,7 +979,7 @@ br_fsscan_activate(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -999,7 +993,7 @@ br_fsscan_activate(xlator_t *this)
}
pthread_mutex_unlock(&scrub_monitor->donelock);
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
(void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
_br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
@@ -1016,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -1033,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this)
if (!fsscrub->frequency_reconf)
return 0;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1041,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this)
return -1;
}
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1069,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_monitor *scrub_monitor = NULL;
priv = this->private;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
-
+ now = gf_time();
timo = BR_SCRUB_ONDEMAND;
-
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1543,9 +1531,11 @@ br_scrubber_log_option(xlator_t *this, br_private_t *priv,
[BR_SCRUB_THROTTLE_LAZY] = "lazy",
[BR_SCRUB_THROTTLE_NORMAL] = "normal",
[BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive",
+ [BR_SCRUB_THROTTLE_STALLED] = "stalled",
};
char *scrub_freq_str[] = {
+ [0] = "",
[BR_FSSCRUB_FREQ_HOURLY] = "hourly",
[BR_FSSCRUB_FREQ_DAILY] = "daily",
[BR_FSSCRUB_FREQ_WEEKLY] = "weekly",
@@ -1558,6 +1548,8 @@ br_scrubber_log_option(xlator_t *this, br_private_t *priv,
return; /* logged as pause */
if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) {
+ if (fsscrub->throttle == BR_SCRUB_THROTTLE_VOID)
+ return;
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE,
"SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]",
scrub_freq_str[fsscrub->frequency],
@@ -1649,7 +1641,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
int32_t ret = -1;
off_t offset = 0;
int32_t count = 0;
- char key[PATH_MAX] = {
+ char key[32] = {
0,
};
dict_t *out_dict = NULL;
@@ -1687,7 +1679,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
}
ret = count;
- ret = dict_set_int32(dict, "count", count);
+ ret = dict_set_int32_sizen(dict, "count", count);
out:
return ret;
@@ -1769,10 +1761,10 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
{
int32_t ret = -1;
int32_t count = 0;
- char key[PATH_MAX] = {
+ char key[32] = {
0,
};
- char main_key[PATH_MAX] = {
+ char main_key[32] = {
0,
};
int32_t j = 0;
@@ -1784,15 +1776,15 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
char *path = NULL;
int32_t len = 0;
- ret = dict_get_int32(child_dict, "count", &count);
+ ret = dict_get_int32_sizen(child_dict, "count", &count);
if (ret)
goto out;
tmp_count = total_count;
for (j = 0; j < count; j++) {
- snprintf(key, PATH_MAX, "quarantine-%d", j);
- ret = dict_get_str(child_dict, key, &entry);
+ len = snprintf(key, sizeof(key), "quarantine-%d", j);
+ ret = dict_get_strn(child_dict, key, len, &entry);
if (ret)
continue;
@@ -1802,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
if ((len < 0) || (len >= PATH_MAX)) {
continue;
}
- snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count);
+ snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
if (!ret)
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index 7a3c14abb93..4e5f67bc021 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_SCRUB_H__
#define __BIT_ROT_SCRUB_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "bit-rot.h"
void *
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
index f3fbe2928b7..37b45a42eac 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_SSM_H__
#define __BIT_ROT_SSM_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
typedef enum br_scrub_state {
BR_SCRUB_STATE_INACTIVE = 0,
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 8ea89257836..a2f1c343a1d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -9,12 +9,9 @@
*/
#include <ctype.h>
-#include <sys/uio.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "compat-errno.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/compat-errno.h>
#include "bit-rot.h"
#include "bit-rot-scrub.h"
@@ -244,8 +241,8 @@ br_object_open(xlator_t *this, br_object_t *object, inode_t *inode,
ret = -EINVAL;
fd = fd_create(inode, 0);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
- "failed to create fd for the inode %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -299,8 +296,8 @@ br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child,
NULL, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
- "readv on %s failed", uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
ret = -1;
goto out;
}
@@ -350,9 +347,9 @@ br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd,
ret = br_object_read_block_and_sign(this, fd, child, offset, block,
&sha256);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
- "reading block with offset %" PRIu64 " of object %s failed",
- offset, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
+ "offset=%" PRIu64, offset, "object-gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
break;
}
@@ -394,28 +391,23 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
if (!md) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate memory for saving hash of the "
- "object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
ret = br_object_checksum(md, object, fd, iatt);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
- "calculating checksum "
- "for the object %s failed",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
+ "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto free_signature;
}
sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH,
BR_SIGNATURE_TYPE_SHA256, object);
if (!sign) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
- "failed to get the signature for the object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto free_signature;
}
@@ -423,17 +415,16 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
signature_size(SHA256_DIGEST_LENGTH), _gf_true);
if (!xattr) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
- "dict allocation for signing failed for the object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto free_isign;
}
ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
- "fsetxattr of signature to the object %s failed",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unref_dict;
}
@@ -466,8 +457,8 @@ br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
"[reason: %s]",
op, uuid_utoa(gfid), strerror(op_errno));
} else {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
- "%s() failed on object %s", op, uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "gfid=%s", uuid_utoa(gfid), NULL);
}
}
@@ -481,8 +472,8 @@ br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno)
"[reason: %s]",
op, path, strerror(op_errno));
} else {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
- "%s() failed on object %s", op, path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "path=%s", path, NULL);
}
}
@@ -511,8 +502,8 @@ br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode,
ret = -1;
fd = fd_create(linked_inode, 0);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
- "Failed to create fd [GFID %s]", uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto cleanup_dict;
}
@@ -536,9 +527,9 @@ cleanup_dict:
dict_unref(dict);
out:
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN,
- "Could not trigger signingd for %s (reopen hint: %d)",
- uuid_utoa(linked_inode->gfid), val);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d",
+ val, NULL);
}
}
@@ -618,10 +609,8 @@ br_sign_object(br_object_t *object)
ret = br_object_read_sign(linked_inode, fd, object, &iatt);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
- "reading and signing of "
- "the object %s failed",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto unref_fd;
}
@@ -675,8 +664,8 @@ br_process_object(void *arg)
ret = br_sign_object(object);
if (ret && !br_object_sign_softerror(-ret))
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SIGN_FAILED,
- "SIGNING FAILURE [%s]", uuid_utoa(object->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
GF_FREE(object);
}
@@ -778,9 +767,8 @@ br_schedule_object_reopen(xlator_t *this, br_object_t *object,
timer = br_initialize_timer(this, object, child, ev);
if (!timer)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
- "Failed to allocate object expiry timer [GFID: %s]",
- uuid_utoa(object->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
return timer ? 0 : -1;
}
@@ -827,15 +815,15 @@ br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev)
child = br_get_child_from_brick_path(this, brick);
if (!child) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
- "failed to get the subvolume for the brick %s", brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
+ "brick=%s", brick, NULL);
goto out;
}
object = br_initialize_object(this, child, ev);
if (!object) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate object memory [GFID: %s]", uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "object-gfid=%s", uuid_utoa(gfid), NULL);
goto out;
}
@@ -887,8 +875,8 @@ br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child)
ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
- "failed to get object signature info");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-info", NULL);
goto out;
}
@@ -927,9 +915,9 @@ br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent,
ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path);
if (ret < 0 || !loc->path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
- "inode_path on %s (parent: %s) failed", entry->d_name,
- uuid_utoa(parent->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
+ "inode_path=%s", entry->d_name, "parent-gfid=%s",
+ uuid_utoa(parent->inode->gfid), NULL);
goto out;
}
@@ -973,6 +961,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int32_t ret = -1;
inode_t *linked_inode = NULL;
gf_boolean_t need_signing = _gf_false;
+ gf_boolean_t need_reopen = _gf_true;
GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
GF_VALIDATE_OR_GOTO("bit-rot", data, out);
@@ -1020,8 +1009,8 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
*/
if (bitd_is_bad_file(this, child, &loc, NULL)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
- "Entry [%s] is marked corrupted.. skipping.", loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s",
+ loc.path, NULL);
goto unref_inode;
}
@@ -1038,23 +1027,32 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
if (op_errno == ENODATA && (iatt.ia_size != 0))
need_signing = _gf_true;
if (op_errno == EINVAL)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- BRB_MSG_PARTIAL_VERSION_PRESENCE,
- "Partial "
- "version xattr presence detected, ignoring "
- "[GFID: %s]",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s",
+ uuid_utoa(linked_inode->gfid), NULL);
} else {
need_signing = br_check_object_need_sign(this, xattr, child);
+
+ /*
+ * If we are here means, bitrot daemon has started. Is it just
+ * a simple restart of the daemon or is it started because the
+ * feature is enabled is something hard to determine. Hence,
+ * if need_signing is false (because bit-rot version and signature
+ * are present), then still go ahead and sign it.
+ */
+ if (!need_signing) {
+ need_signing = _gf_true;
+ need_reopen = _gf_true;
+ }
}
if (!need_signing)
goto unref_dict;
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
- "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
- uuid_utoa(linked_inode->gfid), child->brick_path);
- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s",
+ loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s",
+ child->brick_path, NULL);
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
ret = 0;
@@ -1086,17 +1084,16 @@ br_oneshot_signer(void *arg)
THIS = this;
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START,
- "Crawling brick [%s], scanning for unsigned objects",
- child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s",
+ child->brick_path, NULL);
loc.inode = child->table->root;
(void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child,
bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT,
BR_CRAWL_THROTTLE_ZZZ);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
- "Completed crawling brick [%s]", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
+ "brick-path=%s", child->brick_path, NULL);
return NULL;
}
@@ -1140,9 +1137,7 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
ret = gf_changelog_register_generic(brick, 1, 1,
this->ctx->cmd_args.log_file, -1, this);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED,
- "Register to changelog "
- "failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL);
goto dealloc;
}
@@ -1150,8 +1145,8 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child,
"brosign");
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
- "failed to spawn FS crawler thread");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
+ "FS-crawler-thread", NULL);
else
child->threadrunning = 1;
@@ -1179,9 +1174,9 @@ br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan,
ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child,
"brfsscan");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
- "failed to spawn bitrot scrubber daemon [Brick: %s]",
- child->brick_path);
+ gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
+ "bitrot-scrubber-daemon Brick-path=%s", child->brick_path,
+ NULL);
goto error_return;
}
@@ -1269,8 +1264,8 @@ br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
if (!ret) {
child->witnessed = 1;
_br_set_child_state(child, BR_CHILD_STATE_CONNECTED);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
- "Connected to brick %s..", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
+ "brick-path=%s", child->brick_path, NULL);
}
}
pthread_mutex_unlock(&child->lock);
@@ -1317,8 +1312,8 @@ br_brick_connect(xlator_t *this, br_child_t *child)
if (ret) {
op_errno = -ret;
ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
- "lookup on root failed");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
+ NULL);
goto wipeloc;
}
@@ -1327,15 +1322,14 @@ br_brick_connect(xlator_t *this, br_child_t *child)
if (ret) {
op_errno = -ret;
ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
- "failed to get stub info");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
+ NULL);
goto wipeloc;
}
ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED,
- "failed to extract stub information");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL);
goto free_dict;
}
@@ -1405,11 +1399,10 @@ br_cleanup_scrubber(xlator_t *this, br_child_t *child)
*/
ret = gf_thread_cleanup_xint(child->thread);
if (ret)
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP,
- "Error cleaning up scanner thread");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
- "Cleaned up scrubber for brick [%s]", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
+ "brick-path=%s", child->brick_path, NULL);
return 0;
}
@@ -1494,9 +1487,8 @@ br_handle_events(void *arg)
child = childev->child;
ret = childev->call(this, child);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
- "callback handler for subvolume [%s] failed",
- child->xl->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
+ "name=%s", child->xl->name, NULL);
GF_FREE(childev);
}
@@ -1514,8 +1506,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED,
- "Memory accounting init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL);
return ret;
}
@@ -1532,8 +1523,8 @@ _br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call)
childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t);
if (!childev) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "Event unhandled for child.. [Brick: %s]", child->xl->name);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED,
+ "Brick-name=%s", child->xl->name, NULL);
return;
}
@@ -1628,10 +1619,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
switch (event) {
case GF_EVENT_CHILD_UP:
if (idx < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
- "Got event %d from "
- "invalid subvolume",
- event);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
goto out;
}
@@ -1659,9 +1648,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_CHILD_DOWN:
if (idx < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_INVALID_SUBVOL_CHILD,
- "Got event %d from invalid subvolume", event);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
goto out;
}
@@ -1702,11 +1690,9 @@ notify(xlator_t *this, int32_t event, void *data, ...)
"called");
if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "on demand scrub schedule failed. Scrubber is "
- "not in pending state. Current state is %d",
- scrub_monitor->state);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d",
+ scrub_monitor->state, NULL);
return -2;
}
@@ -1718,11 +1704,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
pthread_mutex_unlock(&priv->lock);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "Could not schedule ondemand scrubbing. "
- "Scrubbing will continue according to "
- "old frequency.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL);
}
gf_msg_debug(this->name, 0, "returning %d", ret);
break;
@@ -1734,22 +1717,26 @@ out:
return 0;
}
-/**
- * Initialize signer specific structures, spawn worker threads.
- */
-
static void
br_fini_signer(xlator_t *this, br_private_t *priv)
{
int i = 0;
- for (; i < BR_WORKERS; i++) {
+ if (priv == NULL)
+ return;
+
+ for (; i < priv->signer_th_count; i++) {
(void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
}
+ GF_FREE(priv->obj_queue->workers);
pthread_cond_destroy(&priv->object_cond);
}
+/**
+ * Initialize signer specific structures, spawn worker threads.
+ */
+
static int32_t
br_init_signer(xlator_t *this, br_private_t *priv)
{
@@ -1769,13 +1756,17 @@ br_init_signer(xlator_t *this, br_private_t *priv)
goto cleanup_cond;
INIT_LIST_HEAD(&priv->obj_queue->objects);
- for (i = 0; i < BR_WORKERS; i++) {
+ priv->obj_queue->workers = GF_CALLOC(
+ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t);
+ if (!priv->obj_queue->workers)
+ goto cleanup_obj_queue;
+
+ for (i = 0; i < priv->signer_th_count; i++) {
ret = gf_thread_create(&priv->obj_queue->workers[i], NULL,
br_process_object, this, "brpobj");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
- "thread creation"
- " failed");
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_THREAD_CREATION_FAILED, NULL);
ret = -1;
goto cleanup_threads;
}
@@ -1787,7 +1778,9 @@ cleanup_threads:
for (i--; i >= 0; i--) {
(void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
}
+ GF_FREE(priv->obj_queue->workers);
+cleanup_obj_queue:
GF_FREE(priv->obj_queue);
cleanup_cond:
@@ -1840,18 +1833,17 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
if (contribution == 0)
contribution = 1;
spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
- spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE;
#endif
if (!spec.rate)
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
- "[Rate Limit Info] \"FULL THROTTLE\"");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "FULL THROTTLE", NULL);
else
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
- "[Rate Limit Info] \"tokens/sec (rate): %lu, "
- "maxlimit: %lu\"",
- spec.rate, spec.maxlimit);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit,
+ NULL);
priv->tbf = tbf_init(&spec, 1);
return priv->tbf ? 0 : -1;
@@ -1860,11 +1852,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
static int32_t
br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
{
- if (options)
+ if (options) {
GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32,
error_return);
- else
+ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options,
+ uint32, error_return);
+ } else {
GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
+ }
return 0;
@@ -1880,6 +1877,8 @@ br_signer_init(xlator_t *this, br_private_t *priv)
GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
GF_OPTION_INIT("brick-count", numbricks, int32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
ret = br_rate_limit_signer(this, priv->child_count, numbricks);
if (ret)
@@ -1966,8 +1965,8 @@ br_init_children(xlator_t *this, br_private_t *priv)
child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096);
if (!child->timer_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate mem-pool for timer");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC,
+ NULL);
errno = ENOMEM;
goto freechild;
}
@@ -1993,15 +1992,13 @@ init(xlator_t *this)
br_private_t *priv = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD,
- "FATAL: no children");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL);
goto out;
}
priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t);
if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate memory (->priv)");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -2019,8 +2016,8 @@ init(xlator_t *this)
priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
if (!priv->timer_wheel) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
- "global timer wheel unavailable");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
+ NULL);
goto cleanup;
}
@@ -2042,15 +2039,14 @@ init(xlator_t *this)
ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this,
"brhevent");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
- "thread creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED,
+ NULL);
ret = -1;
}
if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED,
- "bit-rot xlator loaded in \"%s\" mode",
- (priv->iamscrubber) ? "SCRUBBER" : "SIGNER");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s",
+ (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL);
return 0;
}
@@ -2097,9 +2093,8 @@ br_reconfigure_monitor(xlator_t *this)
ret = br_scrub_state_machine(this, _gf_false);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "Could not reschedule scrubber for the volume. Scrubbing "
- "will continue according to old frequency.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ NULL);
}
}
@@ -2210,5 +2205,28 @@ struct volume_options options[] = {
.description = "Pause/Resume scrub. Upon resume, scrubber "
"continues from where it left off.",
},
+ {
+ .key = {"signer-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = BR_WORKERS,
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Number of signing process threads. As a best "
+ "practice, set this to the number of processor cores",
+ },
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bit-rot",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 962b4d717e6..8ac7dcdac3d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -11,17 +11,17 @@
#ifndef __BIT_ROT_H__
#define __BIT_ROT_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "changelog.h"
#include "timer-wheel.h"
-#include "throttle-tbf.h"
+#include <glusterfs/throttle-tbf.h>
#include "bit-rot-ssm.h"
#include "bit-rot-common.h"
@@ -30,12 +30,6 @@
#include <openssl/sha.h>
-/**
- * TODO: make this configurable. As a best practice, set this to the
- * number of processor cores.
- */
-#define BR_WORKERS 4
-
typedef enum scrub_throttle {
BR_SCRUB_THROTTLE_VOID = -1,
BR_SCRUB_THROTTLE_LAZY = 0,
@@ -108,12 +102,12 @@ struct br_child {
typedef struct br_child br_child_t;
struct br_obj_n_workers {
- struct list_head objects; /* queue of objects expired from the
- timer wheel and ready to be picked
- up for signing */
- pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
- from the above queue and start
- signing each object */
+ struct list_head objects; /* queue of objects expired from the
+ timer wheel and ready to be picked
+ up for signing */
+ pthread_t *workers; /* Threads which pick up the objects
+ from the above queue and start
+ signing each object */
};
struct br_scrubber {
@@ -209,6 +203,8 @@ struct br_private {
uint32_t expiry_time; /* objects "wait" time */
+ uint32_t signer_th_count; /* Number of signing process threads */
+
tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
index ef683ac7f9f..20561aa7764 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-common.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_COMMON_H__
#define __BIT_ROT_COMMON_H__
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "bit-rot-object-version.h"
#define BR_VXATTR_VERSION (1 << 0)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
index cb567297b60..8ac13a09941 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
@@ -133,8 +133,8 @@ br_stub_add(xlator_t *this, uuid_t gfid)
* show up less number of objects. That's fine as we'll have
* the log files that will have the missing information.
*/
- gf_msg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL,
- "failed to record gfid [%s]", uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s",
+ uuid_utoa(gfid), NULL);
}
return 0;
@@ -157,10 +157,8 @@ br_stub_del(xlator_t *this, uuid_t gfid)
uuid_utoa(gfid));
ret = sys_unlink(gfid_path);
if (ret && (errno != ENOENT)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
- "%s: failed to delete bad object link from quarantine "
- "directory",
- gfid_path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
+ "path=%s", gfid_path, NULL);
ret = -errno;
goto out;
}
@@ -200,13 +198,13 @@ br_stub_check_stub_directory(xlator_t *this, char *fullpath)
}
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "failed to create stub directory [%s]", fullpath);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "create-path=%s", fullpath, NULL);
return ret;
error_return:
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to verify stub directory [%s]", fullpath);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", fullpath, NULL);
return -1;
}
@@ -231,8 +229,8 @@ br_stub_check_stub_file(xlator_t *this, char *path)
goto error_return;
fd = sys_creat(path, 0);
if (fd < 0)
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to create stub file [%s]", path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL);
}
if (fd >= 0) {
@@ -243,8 +241,8 @@ br_stub_check_stub_file(xlator_t *this, char *path)
return ret;
error_return:
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to verify stub file [%s]", path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", path, NULL);
return -1;
}
@@ -463,12 +461,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
seekdir(dir, off);
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
- "seekdir(0x%llx) failed on dir=%p: "
- "Invalid argument (offset reused from "
- "another DIR * structure?)",
- off, dir);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off,
+ "dir=%p", dir, NULL);
errno = EINVAL;
count = -1;
goto out;
@@ -480,9 +475,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
in_case = (u_long)telldir(dir);
if (in_case == -1) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL,
- "telldir failed on dir=%p: %s", dir, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s",
+ strerror(errno), NULL);
goto out;
}
@@ -490,9 +485,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
entry = sys_readdir(dir, scratch);
if (!entry || errno != 0) {
if (errno == EBADF) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0,
- BRS_MSG_BAD_OBJECT_DIR_READ_FAIL,
- "readdir failed on dir=%p: %s", dir, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir,
+ "err=%s", strerror(errno), NULL);
goto out;
}
break;
@@ -514,12 +509,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != in_case &&
in_case != fctx->bad_object.dir_eof) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
- "seekdir(0x%llx) failed on dir=%p: "
- "Invalid argument (offset reused from "
- "another DIR * structure?)",
- in_case, dir);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)",
+ in_case, "dir=%p", dir, NULL);
errno = EINVAL;
count = -1;
goto out;
@@ -531,9 +523,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
this_entry = gf_dirent_for_name(entry->d_name);
if (!this_entry) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "could not create gf_dirent for entry %s: (%s)",
- entry->d_name, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s",
+ entry->d_name, "err=%s", strerror(errno), NULL);
goto out;
}
/*
@@ -580,8 +572,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = br_stub_fd_ctx_get(this, fd);
if (!fctx) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
- "pfd is NULL, fd=%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
+ "fd=%p", fd, NULL);
op_errno = -ret;
goto done;
}
@@ -589,8 +581,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
dir = fctx->bad_object.dir;
if (!dir) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
- "dir is NULL for fd=%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
+ "fd=%p", fd, NULL);
op_errno = EINVAL;
goto done;
}
@@ -680,10 +672,7 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
* be shown.
*/
if (!tmp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "failed to allocate new dict for saving the paths "
- "of the corrupted objects. Scrub status will only "
- "display the gfid");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL);
goto out;
}
}
@@ -707,9 +696,8 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
uuid_utoa(gfid), hpath);
br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict);
} else
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path for the inode %s",
- uuid_utoa_r(gfid, str_gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL);
inode = NULL;
hpath = NULL;
@@ -744,10 +732,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
inode, path, _gf_true);
if (ret < 0)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path xattr from disk for the "
- " gfid %s. Trying to get path from the memory",
- uuid_utoa_r(gfid, gfid_str));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
/*
* Try with soft resolution of path if hard resolve fails. Because
@@ -768,9 +754,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
inode, path, _gf_false);
if (ret < 0)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path from the memory for gfid %s",
- uuid_utoa_r(gfid, gfid_str));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "from-memory gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
}
out:
@@ -804,10 +789,8 @@ br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
ret = dict_set_dynstr(dict, entry->d_name, hpath);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
- "failed to set the actual path %s as the value in the "
- "dict for the corrupted object %s",
- hpath, entry->d_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
+ "path=%s", hpath, "object-name=%s", entry->d_name, NULL);
out:
return;
}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index a3e7b03291e..9d93caf069f 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _BR_MEM_TYPES_H
#define _BR_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum br_mem_types {
gf_br_stub_mt_private_t = gf_common_mt_end + 1,
@@ -29,6 +29,7 @@ enum br_mem_types {
gf_br_stub_mt_sigstub_t,
gf_br_mt_br_child_event_t,
gf_br_stub_mt_misc,
+ gf_br_mt_br_worker_t,
gf_br_stub_mt_end,
};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 0e22f74f9cf..6c15a166f18 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -11,7 +11,7 @@
#ifndef _BITROT_STUB_MESSAGES_H_
#define _BITROT_STUB_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -39,6 +39,79 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
+ BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
+#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
+#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size"
+#define BRS_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR \
+ "failed to create the new thread for signer"
+#define BRS_MSG_BAD_CONTAINER_FAIL_STR \
+ "failed to launch the thread for storing bad gfids"
+#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR \
+ "Could not cancel sign serializer thread"
+#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread"
+#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR \
+ "failed to init the inode context for the inode"
+#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode"
+#define BRS_MSG_NO_MEMORY_STR "local allocation failed"
+#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning"
+#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version"
+#define BRS_MSG_NON_BITD_PID_STR \
+ "PID from where signature request came, does not belong to bit-rot " \
+ "daemon. Unwinding the fop"
+#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
+ "failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
+#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
+#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
+ "bad object marking is not from the scrubber"
+#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory"
+#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr"
+#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr"
+#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR \
+ "failed to create anonymous fd for the inode"
+#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list"
+#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR \
+ "failed to set the fd context for the file"
+#define BRS_MSG_NULL_LOCAL_STR "local is NULL"
+#define BRS_MSG_DICT_ALLOC_FAILED_STR \
+ "dict allocation failed: cannot send IPC FOP to changelog"
+#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict"
+#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure"
+#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error"
+#define BRS_MSG_LINK_FAIL_STR "failed to record gfid"
+#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR \
+ "failed to delete bad object link from quaratine directory"
+#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory"
+#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR \
+ "seekdir failed. Invalid argument (offset reused from another DIR * " \
+ "structure)"
+#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir"
+#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir"
+#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent"
+#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL"
+#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL"
+#define BRS_MSG_ALLOC_FAILED_STR \
+ "failed to allocate new dict for saving the paths of the corrupted " \
+ "objects. Scrub status will only display the gfid"
+#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path"
+#define BRS_MSG_PATH_XATTR_GET_FAILED_STR \
+ "failed to get the path xattr from disk for the gfid. Trying to get path " \
+ "from the memory"
+#define BRS_MSG_DICT_SET_FAILED_STR \
+ "failed to set the actual path as the value in the dict for the " \
+ "corrupted object"
+#define BRS_MSG_SET_CONTEXT_FAILED_STR \
+ "could not set fd context for release callback"
+#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed"
#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 0fc2b651b2d..447dd47ff41 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -12,12 +12,11 @@
#include <sys/uio.h>
#include <signal.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
#include "changelog.h"
-#include "compat-errno.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/call-stub.h>
#include "bit-rot-stub.h"
#include "bit-rot-stub-mem-types.h"
@@ -26,6 +25,15 @@
#define BR_STUB_REQUEST_COOKIE 0x1
+void
+br_stub_lock_cleaner(void *arg)
+{
+ pthread_mutex_t *clean_mutex = arg;
+
+ pthread_mutex_unlock(clean_mutex);
+ return;
+}
+
void *
br_stub_signth(void *);
@@ -48,8 +56,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED,
- "Memory accounting init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL);
return ret;
}
@@ -64,29 +71,29 @@ br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv)
ret = pthread_cond_init(&priv->container.bad_cond, NULL);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_cond_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "cond_init ret=%d", ret, NULL);
goto out;
}
ret = pthread_mutex_init(&priv->container.bad_lock, NULL);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_mutex_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "mutex_init ret=%d", ret, NULL);
goto cleanup_cond;
}
ret = pthread_attr_init(&w_attr);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_attr_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "attr_init ret=%d", ret, NULL);
goto cleanup_lock;
}
ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE);
if (ret == EINVAL) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "Using default thread stack size");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL);
}
INIT_LIST_HEAD(&priv->container.bad_queue);
@@ -122,8 +129,7 @@ init(xlator_t *this)
br_stub_private_t *priv = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD,
- "FATAL: no children");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL);
goto error_return;
}
@@ -161,16 +167,20 @@ init(xlator_t *this)
* assigned inside the thread. So setting this->private here.
*/
this->private = priv;
+ if (!priv->do_versioning)
+ return 0;
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
"brssign");
- if (ret != 0)
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+ NULL);
goto cleanup_lock;
+ }
ret = br_stub_bad_object_container_init(this, priv);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
- "failed to launch the thread for storing bad gfids");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL);
goto cleanup_lock;
}
@@ -183,6 +193,7 @@ cleanup_lock:
pthread_mutex_destroy(&priv->lock);
free_mempool:
mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
free_priv:
GF_FREE(priv);
this->private = NULL;
@@ -211,10 +222,62 @@ reconfigure(xlator_t *this, dict_t *options)
priv = this->private;
- GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, out);
+ GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
+ if (priv->do_versioning && !priv->signth) {
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL);
+ goto err;
+ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
+ NULL);
+ goto err;
+ }
+ } else {
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+ NULL);
+ priv->signth = 0;
+ }
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ }
ret = 0;
-out:
+ return ret;
+err:
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->signth = 0;
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ ret = -1;
return ret;
}
@@ -245,10 +308,13 @@ fini(xlator_t *this)
if (!priv)
return;
+ if (!priv->do_versioning)
+ goto cleanup;
+
ret = gf_thread_cleanup_xint(priv->signth);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
- "Could not cancel sign serializer thread");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
goto out;
}
priv->signth = 0;
@@ -262,13 +328,10 @@ fini(xlator_t *this)
GF_FREE(sigstub);
}
- pthread_mutex_destroy(&priv->lock);
- pthread_cond_destroy(&priv->cond);
-
ret = gf_thread_cleanup_xint(priv->container.thread);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
- "Could not cancel sign serializer thread");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
goto out;
}
@@ -280,14 +343,18 @@ fini(xlator_t *this)
call_stub_destroy(stub);
}
+ pthread_mutex_destroy(&priv->container.bad_lock);
+ pthread_cond_destroy(&priv->container.bad_cond);
+
+cleanup:
+ pthread_mutex_destroy(&priv->lock);
+ pthread_cond_destroy(&priv->cond);
+
if (priv->local_pool) {
mem_pool_destroy(priv->local_pool);
priv->local_pool = NULL;
}
- pthread_mutex_destroy(&priv->container.bad_lock);
- pthread_cond_destroy(&priv->container.bad_cond);
-
this->private = NULL;
GF_FREE(priv);
@@ -357,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
priv = this->private;
br_set_ongoingversion(obuf, oversion, priv->boot);
- return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
- sizeof(br_version_t));
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
}
static int
@@ -369,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
br_set_signature(sbuf, sign, signaturelen, &size);
- return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf,
- size);
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
}
/**
@@ -510,11 +576,9 @@ br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning,
ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
_gf_true, _gf_false, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to "
- " init the inode context for the inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto error_return;
}
}
@@ -548,10 +612,8 @@ br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
if (!br_stub_fd) {
ret = br_stub_add_fd_to_inode(this, fd, ctx);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
- "failed to add fd to "
- "the inode (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
}
@@ -571,9 +633,8 @@ br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd,
local = br_stub_alloc_local(this);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
- "local allocation failed (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto error_return;
}
@@ -643,8 +704,8 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
ret = br_stub_is_bad_object(this, inode);
if (ret == -2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
- "%s is a bad object. Returning", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
*op_ret = -1;
*op_errno = EIO;
}
@@ -653,9 +714,9 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
_gf_false, NULL);
if (ret) {
- gf_msg(
- this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to init inode context for %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
*op_ret = -1;
*op_errno = EINVAL;
}
@@ -792,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
op_errno = ENOMEM;
dict = dict_new();
if (!dict)
- goto done;
+ goto out;
ret = br_stub_alloc_versions(&obuf, NULL, 0);
- if (ret)
- goto dealloc_dict;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
ret = br_stub_fd_versioning(
this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
-
-dealloc_versions:
- br_stub_dealloc_versions(obuf);
-dealloc_dict:
- dict_unref(dict);
-done:
+out:
+ if (dict)
+ dict_unref(dict);
if (ret) {
if (local)
frame->local = NULL;
@@ -846,6 +911,24 @@ br_stub_signth(void *arg)
THIS = this;
while (1) {
+ /*
+ * Disabling bit-rot feature leads to this particular thread
+ * getting cleaned up by reconfigure via a call to the function
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+ * and pthread_join). But, if this thread had held the mutex
+ * &priv->lock at the time of cancellation, then it leads to
+ * deadlock in future when bit-rot feature is enabled (which
+ * again spawns this thread which cant hold the lock as the
+ * mutex is still held by the previous instance of the thread
+ * which got killed). Also, the br_stub_handle_object_signature
+ * function which is called whenever file has to be signed
+ * also gets blocked as it too attempts to acquire &priv->lock.
+ *
+ * So, arrange for the lock to be unlocked as part of the
+ * cleanup of this thread using pthread_cleanup_push and
+ * pthread_cleanup_pop.
+ */
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
pthread_mutex_lock(&priv->lock);
{
while (list_empty(&priv->squeue))
@@ -856,6 +939,7 @@ br_stub_signth(void *arg)
list_del_init(&sigstub->list);
}
pthread_mutex_unlock(&priv->lock);
+ pthread_cleanup_pop(0);
call_resume(sigstub->stub);
@@ -931,10 +1015,9 @@ br_stub_compare_sign_version(xlator_t *this, inode_t *inode,
if (invalid) {
ret = -1;
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
- "Signing version exceeds "
- "current version [%lu > %lu]",
- sbuf->signedversion, ctx->currentversion);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
+ "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu",
+ ctx->currentversion, NULL);
}
out:
@@ -945,31 +1028,36 @@ static int
br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
br_isignature_t *sign, int *fakesuccess)
{
- int32_t ret = 0;
+ int32_t ret = -1;
size_t signaturelen = 0;
br_signature_t *sbuf = NULL;
if (!br_is_signature_type_valid(sign->signaturetype))
- goto error_return;
+ goto out;
signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
- if (ret)
- goto error_return;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
- if (ret)
- goto dealloc_versions;
-
- return 0;
-
-dealloc_versions:
- br_stub_dealloc_versions(sbuf);
-error_return:
- return -1;
+out:
+ return ret;
}
static void
@@ -986,12 +1074,18 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- if (frame->root->pid != GF_CLIENT_PID_BITD)
+ if (frame->root->pid != GF_CLIENT_PID_BITD) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+ "PID=%d", frame->root->pid, NULL);
goto dofop;
+ }
ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto dofop;
+ }
if (fakesuccess) {
op_ret = op_errno = 0;
goto dofop;
@@ -1141,10 +1235,8 @@ br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd,
stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for fsetxattr fop (gfid: %s),"
- " unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -1198,9 +1290,8 @@ br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie,
*/
ret = br_stub_mark_object_bad(this, local->u.context.inode);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
- "failed to mark object %s as bad",
- uuid_utoa(local->u.context.inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
+ "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL);
ret = br_stub_add(this, local->u.context.inode->gfid);
@@ -1220,18 +1311,15 @@ br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t op_errno = EINVAL;
if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
- "bad object marking "
- "on %s is not from the scrubber",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
local = br_stub_alloc_local(this);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "failed to allocate memory for fsetxattr on %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
@@ -1270,10 +1358,9 @@ br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
- "setxattr called"
- " on the internal xattr %s for inode %s",
- key, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
return 0;
@@ -1291,10 +1378,8 @@ br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno)
goto out;
}
dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format);
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
- "fsetxattr called on "
- "internal xattr %s",
- dump);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "fsetxattr dump=%s", dump, NULL);
out:
if (dump) {
GF_FREE(dump);
@@ -1331,6 +1416,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
/* object signature request */
ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
if (!ret) {
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+ uuid_utoa(fd->inode->gfid));
br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
goto done;
}
@@ -1423,10 +1510,8 @@ br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
!strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
!strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
- "removexattr called"
- " on internal xattr %s for file %s",
- name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "file-path=%s", loc->path, NULL);
goto unwind;
}
@@ -1448,10 +1533,9 @@ br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
!strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
!strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
- "removexattr called"
- " on internal xattr %s for inode %s",
- name, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto unwind;
}
@@ -1476,7 +1560,7 @@ br_stub_listxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0)
goto unwind;
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, _gf_true);
unwind:
STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -1537,10 +1621,8 @@ br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode,
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the "
- "inode context for %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -1655,7 +1737,7 @@ br_stub_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_ret = totallen;
delkeys:
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, _gf_true);
unwind:
STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -1711,9 +1793,7 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
void *cookie = NULL;
- uuid_t rootgfid = {
- 0,
- };
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
@@ -1725,8 +1805,6 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
- rootgfid[15] = 1;
-
if (!name) {
cbk = br_stub_listxattr_cbk;
goto wind;
@@ -1796,16 +1874,13 @@ br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
void *cookie = NULL;
- uuid_t rootgfid = {
- 0,
- };
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
br_stub_local_t *local = NULL;
br_stub_private_t *priv = NULL;
- rootgfid[15] = 1;
priv = this->private;
if (!name) {
@@ -2025,10 +2100,8 @@ br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
offset, flags, iobref, xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for write fop (gfid: %s), "
- "unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "write gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2141,10 +2214,8 @@ br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset,
xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for ftruncate fop (gfid: %s),"
- " unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2248,10 +2319,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
fd = fd_anonymous(loc->inode);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
- "failed to create "
- "anonymous fd for the inode %s",
- uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
+ "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -2281,10 +2350,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset,
xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for truncate fop (gfid: %s), "
- "unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2357,11 +2424,9 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
_gf_true, _gf_false, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to init the inode context for "
- "the file %s (gfid: %s)",
- loc->path, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
}
@@ -2380,9 +2445,8 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = br_stub_add_fd_to_inode(this, fd, ctx);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
- "failed add fd to the list (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
@@ -2413,10 +2477,8 @@ br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
ret = br_stub_require_release_call(this, fd, &br_stub_fd);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
- "failed to set the fd "
- "context for the file (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -2703,17 +2765,37 @@ br_stub_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!IA_ISREG(entry->d_stat.ia_type))
continue;
+ /*
+ * Readdirp for most part is a bulk lookup for all the entries
+ * present in the directory being read. Ideally, for each
+ * entry, the handling should be similar to that of a lookup
+ * callback. But for now, just keeping this as it has been
+ * until now (which means, this comment has been added much
+ * later as part of a change that wanted to send the flag
+ * of true/false to br_stub_remove_vxattrs to indicate whether
+ * the bad-object xattr should be removed from the entry->dict
+ * or not). Until this change, the function br_stub_remove_vxattrs
+ * was just removing all the xattrs associated with bit-rot-stub
+ * (like version, bad-object, signature etc). But, there are
+ * scenarios where we only want to send bad-object xattr and not
+ * others. So this comment is part of that change which also
+ * mentions about another possible change that might be needed
+ * in future.
+ * But for now, adding _gf_true means functionally its same as
+ * what this function was doing before. Just remove all the stub
+ * related xattrs.
+ */
ret = br_stub_get_inode_ctx(this, entry->inode, &ctxaddr);
if (ret < 0)
ctxaddr = 0;
if (ctxaddr) { /* already has the context */
- br_stub_remove_vxattrs(entry->dict);
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
continue;
}
ret = br_stub_lookup_version(this, entry->inode->gfid, entry->inode,
entry->dict);
- br_stub_remove_vxattrs(entry->dict);
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
if (ret) {
/**
* there's no per-file granularity support in case of
@@ -2849,13 +2931,22 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t ret = 0;
br_stub_private_t *priv = NULL;
gf_boolean_t ver_enabled = _gf_false;
+ gf_boolean_t remove_bad_file_marker = _gf_true;
BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
priv = this->private;
if (op_ret < 0) {
(void)br_stub_handle_lookup_error(this, inode, op_errno);
- goto unwind;
+
+ /*
+ * If the lookup error is not ENOENT, then it is better
+ * to send the bad file marker to the higher layer (if
+ * it has been set)
+ */
+ if (op_errno != ENOENT)
+ remove_bad_file_marker = _gf_false;
+ goto delkey;
}
BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkey);
@@ -2876,7 +2967,13 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (ret) {
op_ret = -1;
op_errno = EIO;
- goto unwind;
+ /*
+ * This flag ensures that in the label @delkey below,
+ * bad file marker is not removed from the dictinary,
+ * but other virtual xattrs (such as version, signature)
+ * are removed.
+ */
+ remove_bad_file_marker = _gf_false;
}
goto delkey;
}
@@ -2900,11 +2997,11 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
*/
op_ret = -1;
op_errno = EIO;
- goto unwind;
+ goto delkey;
}
delkey:
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, remove_bad_file_marker);
unwind:
STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -3090,8 +3187,7 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
if (!local) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL,
- "local is NULL");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL);
goto unwind;
}
inode = local->u.context.inode;
@@ -3109,9 +3205,8 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* has to be removed manually. Its not a good idea to fail
* the fop, as the object has already been deleted.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
goto unwind;
}
@@ -3154,9 +3249,9 @@ br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
if (!local) {
op_ret = -1;
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
- "failed to allocate memory for local (path: %s, gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED,
+ "local path=%s", loc->path, "gfid=%s",
+ uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -3231,23 +3326,21 @@ br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion,
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_NO_MEMORY,
- "dict allocation failed: cannot send IPC FOP "
- "to changelog");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED,
+ NULL);
goto out;
}
ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED,
- "cannot set release event in dict");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL);
goto dealloc_dict;
}
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
- "create_frame() failure");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
+ NULL);
goto dealloc_dict;
}
@@ -3382,8 +3475,8 @@ br_stub_releasedir(xlator_t *this, fd_t *fd)
if (fctx->bad_object.dir) {
ret = sys_closedir(fctx->bad_object.dir);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
- "closedir error: %s", strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
+ "error=%s", strerror(errno), NULL);
}
GF_FREE(fctx);
@@ -3481,3 +3574,17 @@ struct volume_options options[] = {
.default_value = "{{ brick.path }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bitrot-stub",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
index 9e6492ebdd7..edd79a77e4f 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -10,20 +10,20 @@
#ifndef __BIT_ROT_STUB_H__
#define __BIT_ROT_STUB_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
#include "bit-rot-stub-mem-types.h"
-#include "syscall.h"
-#include "common-utils.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
#include "bit-rot-common.h"
#include "bit-rot-stub-messages.h"
#include "glusterfs3-xdr.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
#define BR_STUB_DUMP_STR_SIZE 65536
@@ -222,8 +222,8 @@ br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
ret = br_stub_fd_ctx_set(this, fd, br_stub_fd);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
- "could not set fd context (for release callback");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
+ NULL);
else
*fd_ctx = br_stub_fd;
@@ -273,10 +273,9 @@ __br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version)
if (ctx->currentversion < version)
ctx->currentversion = version;
else
- gf_msg("bit-rot-stub", GF_LOG_WARNING, 0, BRS_MSG_CHANGE_VERSION_FAILED,
- "current version: %lu"
- "new version: %lu",
- ctx->currentversion, version);
+ gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0,
+ BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu",
+ ctx->currentversion, "new version=%lu", version, NULL);
}
static inline int
@@ -359,10 +358,18 @@ br_stub_is_internal_xattr(const char *name)
}
static inline void
-br_stub_remove_vxattrs(dict_t *xattr)
+br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker)
{
if (xattr) {
- dict_del(xattr, BITROT_OBJECT_BAD_KEY);
+ /*
+ * When a file is corrupted, bad-object should be
+ * set in the dict. But, other info such as version,
+ * signature etc should not be set. Hence the flag
+ * remove_bad_marker. The consumer should know whether
+ * to send the bad-object info in the dict or not.
+ */
+ if (remove_bad_marker)
+ dict_del(xattr, BITROT_OBJECT_BAD_KEY);
dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
@@ -390,9 +397,8 @@ br_stub_is_bad_object(xlator_t *this, inode_t *inode)
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
bad_object = -1;
goto out;
}
@@ -420,10 +426,8 @@ br_stub_mark_object_bad(xlator_t *this, inode_t *inode)
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the "
- "inode context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
index 2cdbf1152b9..2da9f2d2a8c 100644
--- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -1,8 +1,10 @@
import os
from ctypes import *
+from ctypes.util import find_library
class Changes(object):
- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True)
+ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL,
+ use_errno=True)
@classmethod
def geterrno(cls):
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
index c4b9a3df692..c933ec53ed2 100644
--- a/xlators/features/changelog/lib/src/Makefile.am
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -1,7 +1,7 @@
libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
-DDATADIR=\"$(localstatedir)\"
-libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \
-I../../../src/ -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/features/changelog/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h
index 32b3497d89d..d7fe7274353 100644
--- a/xlators/features/changelog/lib/src/changelog-lib-messages.h
+++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_LIB_MESSAGES_H_
#define _CHANGELOG_LIB_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -34,7 +34,7 @@ GLFS_MSGID(
CHANGELOG_LIB_MSG_MMAP_FAILED, CHANGELOG_LIB_MSG_MUNMAP_FAILED,
CHANGELOG_LIB_MSG_ASCII_ERROR, CHANGELOG_LIB_MSG_STAT_FAILED,
CHANGELOG_LIB_MSG_GET_XATTR_FAILED, CHANGELOG_LIB_MSG_PUBLISH_ERROR,
- CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
+ CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
CHANGELOG_LIB_MSG_CLEANUP_ERROR, CHANGELOG_LIB_MSG_UNLINK_FAILED,
CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED,
CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO,
@@ -43,6 +43,32 @@ GLFS_MSGID(
CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO,
CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED,
- CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED);
+ CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED,
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED,
+ CHANGELOG_LIB_MSG_REQUESTING_INFO, CHANGELOG_LIB_MSG_FINAL_INFO);
+
+#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO_STR "Registering brick"
+#define CHANGELOG_LIB_MSG_RENAME_FAILED_STR "error moving changelog file"
+#define CHANGELOG_LIB_MSG_OPEN_FAILED_STR "cannot open changelog file"
+#define CHANGELOG_LIB_MSG_UNLINK_FAILED_STR "failed to unlink"
+#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR_STR "failed to rmdir"
+#define CHANGELOG_LIB_MSG_STAT_FAILED_STR "stat failed on changelog file"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_STR "could not parse changelog"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED_STR \
+ "parsing error, ceased publishing..."
+#define CHANGELOG_LIB_MSG_HTIME_ERROR_STR "fop failed on htime file"
+#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED_STR \
+ "error extracting max timstamp from htime file"
+#define CHANGELOG_LIB_MSG_MIN_MAX_INFO_STR "changelogs min max"
+#define CHANGELOG_LIB_MSG_REQUESTING_INFO_STR "Requesting historical changelogs"
+#define CHANGELOG_LIB_MSG_FINAL_INFO_STR "FINAL"
+#define CHANGELOG_LIB_MSG_HIST_FAILED_STR \
+ "Requested changelog range is not available"
+#define CHANGELOG_LIB_MSG_GET_TIME_ERROR_STR "wrong result"
+#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO_STR \
+ "Cleaning brick entry for brick"
+#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO_STR "Draining event"
+#define CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO_STR "Drained event"
+#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO_STR "freeing entry"
#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c
index 1b6e932596d..81a5cbfec10 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-api.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-api.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "compat-uuid.h"
-#include "globals.h"
-#include "glusterfs.h"
-#include "syscall.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
#include "gf-changelog-helpers.h"
#include "gf-changelog-journal.h"
@@ -56,8 +56,8 @@ gf_changelog_done(char *file)
ret = sys_rename(buffer, to_path);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file",
- "from=%s", file, "to=%s", to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
index fd15ec68ab8..75f8a6dfc08 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -11,13 +11,7 @@
#include "changelog-mem-types.h"
#include "gf-changelog-helpers.h"
#include "changelog-lib-messages.h"
-#include "syscall.h"
-
-ssize_t
-gf_changelog_read_path(int fd, char *buffer, size_t bufsize)
-{
- return sys_read(fd, buffer, bufsize);
-}
+#include <glusterfs/syscall.h>
size_t
gf_changelog_write(int fd, char *buffer, size_t len)
@@ -64,20 +58,7 @@ gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr)
* made a part of libglusterfs.
*/
-static pthread_key_t rl_key;
-static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
-
-static void
-readline_destructor(void *ptr)
-{
- GF_FREE(ptr);
-}
-
-static void
-readline_once(void)
-{
- pthread_key_create(&rl_key, readline_destructor);
-}
+static __thread read_line_t thread_tsd = {};
static ssize_t
my_read(read_line_t *tsd, int fd, char *ptr)
@@ -97,27 +78,6 @@ my_read(read_line_t *tsd, int fd, char *ptr)
return 1;
}
-static int
-gf_readline_init_once(read_line_t **tsd)
-{
- if (pthread_once(&rl_once, readline_once) != 0)
- return -1;
-
- *tsd = pthread_getspecific(rl_key);
- if (*tsd)
- goto out;
-
- *tsd = GF_CALLOC(1, sizeof(**tsd), gf_changelog_mt_libgfchangelog_rl_t);
- if (!*tsd)
- return -1;
-
- if (pthread_setspecific(rl_key, *tsd) != 0)
- return -1;
-
-out:
- return 0;
-}
-
ssize_t
gf_readline(int fd, void *vptr, size_t maxlen)
{
@@ -125,10 +85,7 @@ gf_readline(int fd, void *vptr, size_t maxlen)
size_t rc = 0;
char c = ' ';
char *ptr = NULL;
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
ptr = vptr;
for (n = 1; n < maxlen; n++) {
@@ -151,10 +108,7 @@ off_t
gf_lseek(int fd, off_t offset, int whence)
{
off_t off = 0;
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
off = sys_lseek(fd, offset, whence);
if (off == -1)
@@ -169,10 +123,7 @@ gf_lseek(int fd, off_t offset, int whence)
int
gf_ftruncate(int fd, off_t length)
{
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
if (sys_ftruncate(fd, 0))
return -1;
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
index cfb26a0081e..9c609d33172 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -14,9 +14,9 @@
#include <unistd.h>
#include <dirent.h>
#include <limits.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
-#include <xlator.h>
+#include <glusterfs/xlator.h>
#include "changelog.h"
@@ -205,9 +205,6 @@ typedef struct gf_private {
void *
gf_changelog_process(void *data);
-ssize_t
-gf_changelog_read_path(int fd, char *buffer, size_t bufsize);
-
void
gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr);
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
index ef46bf50c97..7f6e2329e71 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "compat-uuid.h"
-#include "globals.h"
-#include "glusterfs.h"
-#include "syscall.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "gf-changelog-helpers.h"
@@ -526,9 +526,8 @@ gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_rename(to_path, dest);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED,
- "error moving changelog to processing dir", "path=%s", to_path,
- NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, "to=%s",
+ dest, NULL);
}
out:
@@ -564,14 +563,14 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
if (ret || !S_ISREG(stbuf.st_mode)) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_STAT_FAILED,
- "stat failed on changelog file", "path=%s", from_path, NULL);
+ "path=%s", from_path, NULL);
goto out;
}
fd1 = open(from_path, O_RDONLY);
if (fd1 < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
- "cannot open changelog file", "path=%s", from_path, NULL);
+ "path=%s", from_path, NULL);
goto out;
}
@@ -579,7 +578,7 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd2 < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
- "cannot create ascii changelog file", "path=%s", to_path, NULL);
+ "path=%s", to_path, NULL);
goto close_fd;
} else {
ret = gf_changelog_decode(this, jnl, fd1, fd2, &stbuf, &zerob);
@@ -594,9 +593,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_rename(to_path, dest);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED,
- "error moving changelog to processing dir", "path=%s",
- to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path,
+ "to=%s", dest, NULL);
}
/* remove it from .current if it's an empty file */
@@ -605,9 +603,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_unlink(to_path);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_UNLINK_FAILED,
- "could not unlink empty changelog", "path=%s", to_path,
- NULL);
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=empty changelog",
+ "path=%s", to_path, NULL);
}
}
@@ -828,7 +825,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl)
ret = recursive_rmdir(jnl->jnl_current_dir);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s",
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
jnl->jnl_current_dir, NULL);
goto out;
}
@@ -849,7 +846,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl)
ret = recursive_rmdir(jnl->jnl_processing_dir);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s",
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
jnl->jnl_processing_dir, NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
index 132e8511be4..56b11cbb705 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-reborp.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
@@ -15,14 +15,14 @@
#include "changelog-rpc-common.h"
#include "changelog-lib-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
/**
* Reverse socket: actual data transfer handler. Connection
* initiator is PROBER, data transfer is REBORP.
*/
-struct rpcsvc_program *gf_changelog_reborp_programs[];
+static struct rpcsvc_program *gf_changelog_reborp_programs[];
void *
gf_changelog_connection_janitor(void *arg)
@@ -55,9 +55,8 @@ gf_changelog_connection_janitor(void *arg)
ev = &entry->event;
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO,
- "Cleaning brick entry for brick", "brick=%s", entry->brick,
- NULL);
+ CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, "brick=%s",
+ entry->brick, NULL);
/* 0x0: disable rpc-clnt */
rpc_clnt_disable(RPC_PROBER(entry));
@@ -71,21 +70,19 @@ gf_changelog_connection_janitor(void *arg)
while (!list_empty(&ev->events)) {
event = list_first_entry(&ev->events, struct gf_event, list);
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Draining event",
- "seq=%lu", event->seq, "payload=%d", event->count, NULL);
+ CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "seq=%lu",
+ event->seq, "payload=%d", event->count, NULL);
GF_FREE(event);
drained++;
}
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Drained events",
- "num=%lu", drained, NULL);
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, "num=%lu", drained, NULL);
/* 0x3: freeup brick entry */
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "freeing entry",
- "entry=%p", entry, NULL);
+ CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "entry=%p", entry, NULL);
LOCK_DESTROY(&entry->statelock);
GF_FREE(entry);
}
@@ -112,9 +109,7 @@ gf_changelog_reborp_rpcsvc_notify(rpcsvc_t *rpc, void *mydata,
ret = sys_unlink(RPC_SOCK(entry));
if (ret != 0)
gf_smsg(this->name, GF_LOG_WARNING, errno,
- CHANGELOG_LIB_MSG_UNLINK_FAILED,
- "failed to unlink "
- "reverse socket",
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=reverse socket",
"path=%s", RPC_SOCK(entry), NULL);
if (entry->connected)
GF_CHANGELOG_INVOKE_CBK(this, entry->connected, entry->brick,
@@ -391,11 +386,10 @@ gf_changelog_reborp_handle_event(rpcsvc_request_t *req)
return gf_changelog_event_handler(req, this, entry);
}
-rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
+static rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
[CHANGELOG_REV_PROC_EVENT] = {"CHANGELOG EVENT HANDLER",
- CHANGELOG_REV_PROC_EVENT,
- gf_changelog_reborp_handle_event, NULL, 0,
- DRC_NA},
+ gf_changelog_reborp_handle_event, NULL,
+ CHANGELOG_REV_PROC_EVENT, DRC_NA, 0},
};
/**
@@ -404,7 +398,7 @@ rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
* and that's required to invoke the callback with the appropriate
* brick path and it's private data.
*/
-struct rpcsvc_program gf_changelog_reborp_prog = {
+static struct rpcsvc_program gf_changelog_reborp_prog = {
.progname = "LIBGFCHANGELOG REBORP",
.prognum = CHANGELOG_REV_RPC_PROCNUM,
.progver = CHANGELOG_REV_RPC_PROCVER,
@@ -413,7 +407,7 @@ struct rpcsvc_program gf_changelog_reborp_prog = {
.synctask = _gf_false,
};
-struct rpcsvc_program *gf_changelog_reborp_programs[] = {
+static struct rpcsvc_program *gf_changelog_reborp_programs[] = {
&gf_changelog_reborp_prog,
NULL,
};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.h b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
index 975307b99d3..5c82d6f1c08 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-rpc.h
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
@@ -11,7 +11,7 @@
#ifndef __GF_CHANGELOG_RPC_H
#define __GF_CHANGELOG_RPC_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "gf-changelog-helpers.h"
#include "changelog-rpc-common.h"
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
index 42e20915922..57c3d39ef76 100644
--- a/xlators/features/changelog/lib/src/gf-changelog.c
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -22,11 +22,11 @@
#endif
#include <string.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "defaults.h"
-#include "syncop.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#include "gf-changelog-rpc.h"
#include "gf-changelog-helpers.h"
@@ -102,8 +102,8 @@ gf_changelog_ctx_defaults_init(glusterfs_ctx_t *ctx)
if (!ctx->iobuf_pool)
goto free_pool;
- ctx->event_pool = event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE,
- GF_CHANGELOG_EVENT_THREAD_COUNT);
+ ctx->event_pool = gf_event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE,
+ GF_CHANGELOG_EVENT_THREAD_COUNT);
if (!ctx->event_pool)
goto free_pool;
@@ -237,9 +237,8 @@ gf_changelog_init_master()
{
int ret = 0;
- mem_pools_init_early();
ret = gf_changelog_init_context();
- mem_pools_init_late();
+ mem_pools_init();
return ret;
}
@@ -574,9 +573,8 @@ gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
brick = bricks;
while (count--) {
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "Registering brick",
- "brick=%s", brick->brick_path, "notify_filter=%d",
- brick->filter, NULL);
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "brick=%s",
+ brick->brick_path, "notify_filter=%d", brick->filter, NULL);
ret = gf_changelog_register_brick(this, brick, need_order, xl);
if (ret != 0) {
diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c
index c8a31ebbd73..a16219f3664 100644
--- a/xlators/features/changelog/lib/src/gf-history-changelog.c
+++ b/xlators/features/changelog/lib/src/gf-history-changelog.c
@@ -8,10 +8,10 @@
#endif
#include <string.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "syscall.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
#include "gf-changelog-helpers.h"
#include "gf-changelog-journal.h"
@@ -79,8 +79,8 @@ gf_history_changelog_done(char *file)
ret = sys_rename(buffer, to_path);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file",
- "from=%s", file, "to=%s", to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
goto out;
}
@@ -522,8 +522,7 @@ gf_changelog_consume_wrap(void *data)
_gf_true);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_PARSE_ERROR,
- "could not parse changelog", "name=%s", ccd->changelog,
- NULL);
+ "name=%s", ccd->changelog, NULL);
goto out;
}
}
@@ -564,9 +563,6 @@ gf_history_consume(void *data)
{0},
};
gf_changelog_consume_data_t *curr = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
hist_data = (gf_changelog_history_data_t *)data;
if (hist_data == NULL) {
@@ -612,12 +608,10 @@ gf_history_consume(void *data)
curr->retval = 0;
memset(curr->changelog, '\0', PATH_MAX);
- snprintf(thread_name, sizeof(thread_name), "clogc%03hx",
- ((iter + 1) & 0x3ff));
ret = gf_thread_create(&th_id[iter], NULL,
gf_changelog_consume_wrap, curr,
- thread_name);
+ "clogc%03hx", (iter + 1) & 0x3ff);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, ret,
CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
@@ -647,9 +641,8 @@ gf_history_consume(void *data)
curr = &ccd[iter];
if (ccd->retval) {
publish = _gf_false;
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_PARSE_ERROR,
- "parsing error, ceased publishing...");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, NULL);
continue;
}
@@ -728,7 +721,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (ret) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "stat() failed on htime file", "path=%s", htime_file, NULL);
+ "op=stat", "path=%s", htime_file, NULL);
goto out;
}
@@ -742,7 +735,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (*fd < 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "open() failed for htime file", "path=%s", htime_file, NULL);
+ "op=open", "path=%s", htime_file, NULL);
goto out;
}
@@ -751,17 +744,15 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (ret < 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_GET_XATTR_FAILED,
- "error extracting max timstamp from htime file"
- "path=%s",
- htime_file, NULL);
+ CHANGELOG_LIB_MSG_GET_XATTR_FAILED, "path=%s", htime_file,
+ NULL);
goto out;
}
sscanf(x_value, "%lu:%lu", max_ts, total);
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
- "changelogs min max", "min=%lu", *min_ts, "max=%lu", *max_ts,
- "total_changelogs=%lu", *total, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
+ "min=%lu", *min_ts, "max=%lu", *max_ts, "total_changelogs=%lu",
+ *total, NULL);
ret = 0;
@@ -842,15 +833,14 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
goto out;
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
- "Requesting historical changelogs", "start=%lu", start, "end=%lu",
- end, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_REQUESTING_INFO,
+ "start=%lu", start, "end=%lu", end, NULL);
/* basic sanity check */
if (start > end || n_parallel <= 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HIST_FAILED,
- "Sanity check failed", "start=%lu", start, "end=%lu", end,
- "thread_count=%d", n_parallel, NULL);
+ "start=%lu", start, "end=%lu", end, "thread_count=%d",
+ n_parallel, NULL);
ret = -1;
goto out;
}
@@ -864,7 +854,7 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
dirp = sys_opendir(htime_dir);
if (dirp == NULL) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "open dir on htime failed", "path=%s", htime_dir, NULL);
+ "op=opendir", "path=%s", htime_dir, NULL);
ret = -1;
goto out;
}
@@ -876,9 +866,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (!entry || errno != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_HIST_FAILED,
- "Requested changelog range is not availbale", "start=%lu",
- start, "end=%lu", end, NULL);
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, NULL);
ret = -2;
break;
}
@@ -916,9 +905,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (gf_history_check(fd, from, start, len) != 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_GET_TIME_ERROR,
- "wrong result for start", "start=%lu", start, "idx=%lu",
- from, NULL);
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=start",
+ "start=%lu", start, "idx=%lu", from, NULL);
goto out;
}
@@ -949,9 +937,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (gf_history_check(fd, to, end2, len) != 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_GET_TIME_ERROR,
- "wrong result for end", "start=%lu", end2, "idx=%lu",
- to, NULL);
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=end",
+ "start=%lu", end2, "idx=%lu", to, NULL);
goto out;
}
@@ -963,9 +950,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (ret == -1)
goto out;
- gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, "FINAL", "from=%lu", ts1,
- "to=%lu", ts2, "changes=%lu", (to - from + 1), NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_FINAL_INFO,
+ "from=%lu", ts1, "to=%lu", ts2, "changes=%lu",
+ (to - from + 1), NULL);
hist_data = GF_CALLOC(1, sizeof(gf_changelog_history_data_t),
gf_changelog_mt_history_data_t);
@@ -1003,11 +990,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
} else { /* end of range check */
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_HIST_FAILED,
- "Requested changelog range is not "
- "available. Retrying next HTIME",
- "start=%lu", start, "end=%lu", end, "chlog_min=%lu", min_ts,
- "chlog_max=%lu", max_ts, NULL);
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, "chlog_min=%lu", min_ts, "chlog_max=%lu",
+ max_ts, NULL);
}
} /* end of readdir() */
diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c
index e8d742404df..0fb89ddb127 100644
--- a/xlators/features/changelog/src/changelog-barrier.c
+++ b/xlators/features/changelog/src/changelog-barrier.c
@@ -10,7 +10,7 @@
#include "changelog-helpers.h"
#include "changelog-messages.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
/* Enqueue a stub*/
void
@@ -53,14 +53,14 @@ chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue)
{
call_stub_t *stub = NULL;
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Dequeuing all the changelog barriered fops");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ NULL);
while ((stub = __chlog_barrier_dequeue(this, queue)))
call_resume(stub);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Dequeuing changelog barriered fops is finished");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL);
return;
}
@@ -80,8 +80,7 @@ chlog_barrier_timeout(void *data)
INIT_LIST_HEAD(&queue);
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_ERROR,
- "Disabling changelog barrier because of the timeout.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL);
LOCK(&priv->lock);
{
@@ -120,8 +119,8 @@ __chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv)
priv->timer = gf_timer_call_after(this->ctx, priv->timeout,
chlog_barrier_timeout, (void *)this);
if (!priv->timer) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, CHANGELOG_MSG_BARRIER_ERROR,
- "Couldn't add changelog barrier timeout event.");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0,
+ CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL);
goto out;
}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
index ca42c4c4fe0..26252696d56 100644
--- a/xlators/features/changelog/src/changelog-encoders.h
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_ENCODERS_H
#define _CHANGELOG_ENCODERS_H
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "changelog-helpers.h"
diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c
index 3ed6ff821d9..aa94459de5a 100644
--- a/xlators/features/changelog/src/changelog-ev-handle.c
+++ b/xlators/features/changelog/src/changelog-ev-handle.c
@@ -134,6 +134,8 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
changelog_clnt_t *c_clnt = NULL;
changelog_priv_t *priv = NULL;
changelog_ev_selector_t *selection = NULL;
+ uint64_t clntcnt = 0;
+ uint64_t xprtcnt = 0;
crpc = mydata;
this = crpc->this;
@@ -144,6 +146,7 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
switch (event) {
case RPC_CLNT_CONNECT:
selection = &priv->ev_selection;
+ GF_ATOMIC_INC(priv->clntcnt);
LOCK(&c_clnt->wait_lock);
{
@@ -176,12 +179,23 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
changelog_set_disconnect_flag(crpc, _gf_true);
}
UNLOCK(&crpc->lock);
+ LOCK(&c_clnt->active_lock);
+ {
+ list_del_init(&crpc->list);
+ }
+ UNLOCK(&c_clnt->active_lock);
break;
case RPC_CLNT_MSG:
case RPC_CLNT_DESTROY:
/* Free up mydata */
changelog_rpc_clnt_unref(crpc);
+ clntcnt = GF_ATOMIC_DEC(priv->clntcnt);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ if (this->cleanup_starting) {
+ if (!clntcnt && !xprtcnt)
+ changelog_process_cleanup_event(this);
+ }
break;
case RPC_CLNT_PING:
break;
@@ -211,8 +225,8 @@ changelog_ev_connector(void *data)
changelog_rpc_notify);
if (!crpc->rpc) {
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_RPC_CONNECT_ERROR,
- "failed to connect back", "path=%s", crpc->sock, NULL);
+ CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock,
+ NULL);
crpc->cleanup(crpc);
goto mutex_unlock;
}
@@ -364,9 +378,8 @@ changelog_ev_dispatch(void *data)
ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher,
c_clnt);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_PUT_BUFFER_FAILED,
- "failed to put buffer after consumption");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL);
}
return NULL;
diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h
index 7e543a0edb3..cc1af58a276 100644
--- a/xlators/features/changelog/src/changelog-ev-handle.h
+++ b/xlators/features/changelog/src/changelog-ev-handle.h
@@ -11,11 +11,11 @@
#ifndef __CHANGELOG_EV_HANDLE_H
#define __CHANGELOG_EV_HANDLE_H
-#include "list.h"
-#include "xlator.h"
+#include <glusterfs/list.h>
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
-#include "rot-buffs.h"
+#include <glusterfs/rot-buffs.h>
struct changelog_clnt;
@@ -131,4 +131,6 @@ changelog_ev_queue_connection(changelog_clnt_t *, changelog_rpc_clnt_t *);
void
changelog_ev_cleanup_connections(xlator_t *, changelog_clnt_t *);
+void
+changelog_process_cleanup_event(xlator_t *);
#endif
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
index 9ff9115c40d..e561997d858 100644
--- a/xlators/features/changelog/src/changelog-helpers.c
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
-#include "iobuf.h"
-#include "syscall.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
+#include <glusterfs/syscall.h>
#include "changelog-helpers.h"
#include "changelog-encoders.h"
@@ -22,6 +22,7 @@
#include "changelog-encoders.h"
#include "changelog-rpc-common.h"
#include <pthread.h>
+#include <time.h>
static void
changelog_cleanup_free_mutex(void *arg_mutex)
@@ -41,16 +42,15 @@ changelog_thread_cleanup(xlator_t *this, pthread_t thr_id)
/* send a cancel request to the thread */
ret = pthread_cancel(thr_id);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, "could not cancel thread");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
goto out;
}
ret = pthread_join(thr_id, &retval);
if ((ret != 0) || (retval != PTHREAD_CANCELED)) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_CANCEL_FAILED,
- "cancel request not adhered as expected");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
}
out:
@@ -153,27 +153,6 @@ changelog_init_event_selection(xlator_t *this,
return 0;
}
-int
-changelog_cleanup_event_selection(xlator_t *this,
- changelog_ev_selector_t *selection)
-{
- int j = CHANGELOG_EV_SELECTION_RANGE;
-
- LOCK(&selection->reflock);
- {
- while (j--) {
- if (selection->ref[j] > 0)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF,
- "changelog event selection cleaning up "
- " on active references");
- }
- }
- UNLOCK(&selection->reflock);
-
- return LOCK_DESTROY(&selection->reflock);
-}
-
static void
changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem,
size_t size)
@@ -263,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len)
}
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer)
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
{
char changelog_path[PATH_MAX + 1] = {
0,
@@ -277,8 +255,8 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
int ret = 0;
if (priv->htime_fd == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "Htime fd not available for updation");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=fd not available", NULL);
ret = -1;
goto out;
}
@@ -288,13 +266,13 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
goto out;
}
if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "Htime file content write failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=write failed", NULL);
ret = -1;
goto out;
}
- len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
priv->rollover_count);
if (len >= sizeof(x_value)) {
ret = -1;
@@ -303,12 +281,12 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Htime xattr updation failed with XATTR_REPLACE",
+ "reason=xattr updation failed", "XATTR_REPLACE=true",
"changelog=%s", changelog_path, NULL);
if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Htime xattr updation failed", "changelog=%s",
+ "reason=xattr updation failed", "changelog=%s",
changelog_path, NULL);
ret = -1;
goto out;
@@ -346,15 +324,15 @@ cl_is_empty(xlator_t *this, int fd)
ret = sys_fstat(fd, &stbuf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
- "Could not stat (CHANGELOG)");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
+ NULL);
goto out;
}
ret = sys_lseek(fd, 0, SEEK_SET);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
- "Could not lseek (CHANGELOG)");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
+ NULL);
goto out;
}
@@ -390,8 +368,8 @@ update_path(xlator_t *this, char *cl_path)
found = strstr(cl_path, up_cl);
if (found == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
- "Could not find CHANGELOG in changelog path");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND,
+ NULL);
goto out;
} else {
memcpy(found, low_cl, sizeof(low_cl) - 1);
@@ -403,18 +381,22 @@ out:
}
static int
-changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts)
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ret = -1;
int notify = 0;
int cl_empty_flag = 0;
+ struct tm *gmt;
+ char yyyymmdd[40];
char ofile[PATH_MAX] = {
0,
};
char nfile[PATH_MAX] = {
0,
};
+ char nfile_dir[PATH_MAX] = {
+ 0,
+ };
changelog_event_t ev = {
0,
};
@@ -422,33 +404,37 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
if (priv->changelog_fd != -1) {
ret = sys_fsync(priv->changelog_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
}
ret = cl_is_empty(this, priv->changelog_fd);
if (ret == 1) {
cl_empty_flag = 1;
} else if (ret == -1) {
/* Log error but proceed as usual */
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
- "Error detecting empty changelog");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL);
}
sys_close(priv->changelog_fd);
priv->changelog_fd = -1;
}
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
+
+ strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
+
(void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
priv->changelog_dir);
- (void)snprintf(nfile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME ".%lu",
- priv->changelog_dir, ts);
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
+ priv->changelog_dir, yyyymmdd, ts);
+ (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
if (cl_empty_flag == 1) {
ret = sys_unlink(ofile);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_UNLINK_OP_FAILED,
- "error unlinking empty changelog", "path=%s", ofile, NULL);
+ CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL);
ret = 0; /* Error in unlinking empty changelog should
not break further changelog operation, so
reset return value to 0*/
@@ -456,13 +442,26 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
} else {
ret = sys_rename(ofile, nfile);
+ /* Changelog file rename gets ENOENT when parent dir doesn't exist */
+ if (errno == ENOENT) {
+ ret = mkdir_p(nfile_dir, 0600, _gf_true);
+
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL);
+ goto out;
+ }
+
+ ret = sys_rename(ofile, nfile);
+ }
+
if (ret && (errno == ENOENT)) {
ret = 0;
goto out;
}
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR,
- "error renaming", "from=%s", ofile, "to=%s", nfile, NULL);
+ "from=%s", ofile, "to=%s", nfile, NULL);
}
}
@@ -476,8 +475,8 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
}
ret = htime_update(this, priv, ts, nfile);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "could not update htime file");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ NULL);
goto out;
}
}
@@ -501,15 +500,10 @@ out:
{
if (ret) {
priv->bn.bnotify_error = _gf_true;
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
- "Fail snapshot because of "
- "previous errors");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL);
} else {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
- "Explicit "
- "rollover changelog signaling "
- "bnotify",
"changelog=%s", nfile, NULL);
}
priv->bn.bnotify = _gf_false;
@@ -556,8 +550,8 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort);
if (cnt < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
- "scandir failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
+ NULL);
} else if (cnt > 0) {
if (snprintf(ht_file_bname, NAME_MAX, "%s",
namelist[cnt - 1]->d_name) >= NAME_MAX) {
@@ -566,16 +560,15 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
}
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSETXATTR_FAILED,
- "fsetxattr failed: HTIME_CURRENT");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL);
ret = -1;
goto out;
}
if (sys_fsync(ht_dir_fd) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
ret = -1;
goto out;
}
@@ -596,7 +589,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -632,7 +625,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ht_dir_fd = open(ht_dir_path, O_RDONLY);
if (ht_dir_fd == -1) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "open failed", "path=%s", ht_dir_path, NULL);
+ "path=%s", ht_dir_path, NULL);
ret = -1;
goto out;
}
@@ -640,9 +633,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
sizeof(ht_file_bname));
if (size < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
- "Error extracting"
- " HTIME_CURRENT.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
+ "name=HTIME_CURRENT", NULL);
/* If upgrade scenario, find the latest HTIME.TSTAMP file
* and use the same. If error, create a new HTIME.TSTAMP
@@ -650,20 +642,18 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
*/
cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname);
if (cnt <= 0) {
- gf_msg(this->name, GF_LOG_INFO, errno, CHANGELOG_MSG_HTIME_INFO,
- "HTIME_CURRENT not found. Changelog enabled"
- " before init");
+ gf_smsg(this->name, GF_LOG_INFO, errno,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, NULL);
sys_close(ht_dir_fd);
return htime_create(this, priv, ts);
}
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Error extracting"
- " HTIME_CURRENT.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL);
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "HTIME_CURRENT", "path=%s", ht_file_bname, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s",
+ ht_file_bname, NULL);
len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -676,7 +666,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (ht_file_fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open htime file", "path=%s", ht_file_path, NULL);
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -686,8 +676,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ret = sys_fstat(ht_file_fd, &stat_buf);
if (ret < 0) {
- gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "unable to stat htime file", "path=%s", ht_file_path, NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -696,9 +686,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value));
if (size < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
- "error extracting max"
- " timstamp from htime file",
- "path=%s", ht_file_path, NULL);
+ "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -710,14 +698,11 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
total1 = stat_buf.st_size / record_len;
if (total != total1) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
- "Mismatch of changelog count. "
- "INIT CASE",
"xattr_total=%lu", total, "size_total=%lu", total1, NULL);
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
- "INIT CASE", "min=%lu", min_ts, "max=%lu", max_ts,
- "total_changelogs=%lu", total, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu",
+ min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL);
if (total < total1)
priv->rollover_count = total1 + 1;
@@ -734,7 +719,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -751,15 +736,13 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
int flags = 0;
int32_t len = 0;
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "Changelog enable: Creating new "
- "HTIME file",
- "name=%lu", ts, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
+ "name=%ld", ts, NULL);
CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
/* get the htime file name in ht_file_path */
- len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
HTIME_FILE_NAME, ts);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -771,23 +754,23 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (ht_file_fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to create htime file", "path=%s", ht_file_path, NULL);
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE,
sizeof(HTIME_INITIAL_VALUE) - 1, 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
- "Htime xattr initialization failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_XATTR_INIT_FAILED, NULL);
ret = -1;
goto out;
}
ret = sys_fsync(ht_file_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
- "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
goto out;
}
@@ -800,26 +783,25 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ht_dir_fd = open(ht_dir_path, O_RDONLY);
if (ht_dir_fd == -1) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "open failed", "path=%s", ht_dir_path, NULL);
+ "path=%s", ht_dir_path, NULL);
ret = -1;
goto out;
}
- (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
HTIME_FILE_NAME, ts);
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
- "fsetxattr failed:"
- " HTIME_CURRENT");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
+ " HTIME_CURRENT", NULL);
ret = -1;
goto out;
}
ret = sys_fsync(ht_dir_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
- "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
goto out;
}
@@ -873,7 +855,7 @@ changelog_snap_open(xlator_t *this, changelog_priv_t *priv)
fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open file", "path=%s", c_snap_path, NULL);
+ "path=%s", c_snap_path, NULL);
ret = -1;
goto out;
}
@@ -905,8 +887,8 @@ changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv)
int ret = 0;
ret = changelog_snap_open(this, priv);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Now starting to log in call path");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting",
+ NULL);
return ret;
}
@@ -926,8 +908,8 @@ changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv)
sys_close(priv->c_snap_fd);
priv->c_snap_fd = -1;
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Stopped to log in call path");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped",
+ NULL);
return ret;
}
@@ -955,9 +937,6 @@ changelog_open_journal(xlator_t *this, changelog_priv_t *priv)
fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open/create changelog file."
- " change-logging will be"
- " inactive",
"path=%s", changelog_path, NULL);
goto out;
}
@@ -980,8 +959,8 @@ out:
}
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale)
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
{
int ret = -1;
@@ -1002,21 +981,12 @@ changelog_entry_length()
return sizeof(changelog_log_data_t);
}
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
{
- struct timeval tv = {
- 0,
- };
-
cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
-
- if (gettimeofday(&tv, NULL))
- return -1;
-
- cld->cld_roll_time = (unsigned long)tv.tv_sec;
+ cld->cld_roll_time = gf_time();
cld->cld_finale = is_last;
- return 0;
}
int
@@ -1074,11 +1044,10 @@ changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld)
ret = changelog_snap_write_change(priv, buffer, off);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
- "error writing csnap to disk");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "csnap", NULL);
}
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Successfully wrote to csnap");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL);
ret = 0;
out:
return ret;
@@ -1095,9 +1064,8 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
ret = changelog_start_next_change(this, priv, cld->cld_roll_time,
cld->cld_finale);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_GET_TIME_OP_FAILED,
- "Problem rolling over changelog(s)");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL);
goto out;
}
@@ -1111,16 +1079,16 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) {
ret = sys_fsync(priv->changelog_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
}
goto out;
}
ret = priv->ce->encode(this, cld);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
- "error writing changelog to disk");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "changelog", NULL);
}
out:
@@ -1143,6 +1111,7 @@ changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
CHANGELOG_MSG_INODE_NOT_FOUND,
"inode needed for version checking !!!");
+
goto out;
}
@@ -1211,7 +1180,7 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
while (priv->dm.black_fop_cnt > 0) {
gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld",
priv->dm.black_fop_cnt);
@@ -1220,14 +1189,14 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
- "pthread cond wait failed", "error=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
}
priv->dm.drain_wait_black = _gf_false;
ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
pthread_cleanup_pop(0);
gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops");
}
@@ -1247,7 +1216,7 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
while (priv->dm.white_fop_cnt > 0) {
gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld",
priv->dm.white_fop_cnt);
@@ -1256,14 +1225,14 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
- "pthread cond wait failed", "error=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
}
priv->dm.drain_wait_white = _gf_false;
ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
pthread_cleanup_pop(0);
gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops");
}
@@ -1292,7 +1261,7 @@ changelog_rollover(void *data)
while (1) {
(void)pthread_testcancel();
- tv.tv_sec = time(NULL) + priv->rollover_time;
+ tv.tv_sec = gf_time() + priv->rollover_time;
tv.tv_nsec = 0;
ret = 0; /* Reset ret to zero */
@@ -1315,12 +1284,12 @@ changelog_rollover(void *data)
pthread_cleanup_pop(0);
if (ret == 0) {
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Explicit wakeup on barrier notify");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
+ NULL);
priv->explicit_rollover = _gf_true;
} else if (ret && ret != ETIMEDOUT) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SELECT_FAILED,
- "pthread_cond_timedwait failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_SELECT_FAILED, NULL);
continue;
} else if (ret && ret == ETIMEDOUT) {
gf_msg_debug(this->name, 0, "Wokeup on timeout");
@@ -1373,13 +1342,7 @@ changelog_rollover(void *data)
if (priv->explicit_rollover == _gf_true)
sleep(1);
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_GET_TIME_OP_FAILED,
- "failed to fill rollover data");
- continue;
- }
+ changelog_fill_rollover_data(&cld, _gf_false);
_mask_cancellation();
@@ -1427,9 +1390,8 @@ changelog_fsync_thread(void *data)
ret = changelog_inject_single_event(this, priv, &cld);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_INJECT_FSYNC_FAILED,
- "failed to inject fsync event");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL);
_unmask_cancellation();
}
@@ -1851,23 +1813,21 @@ changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc,
parent = inode_parent(loc->inode, 0, 0);
if (!parent) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND,
- "Parent inode not found", "gfid=%s",
- uuid_utoa(loc->inode->gfid), NULL);
+ "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto err;
}
CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5);
if (!(*local)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
- "changelog local"
- " initiatilization failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
+ NULL);
goto err;
}
co = changelog_get_usable_buffer(*local);
if (!co) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY,
- "Failed to get buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED,
+ NULL);
goto err;
}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
index 10d457e8cf5..38fa7590c32 100644
--- a/xlators/features/changelog/src/changelog-helpers.h
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -11,14 +11,14 @@
#ifndef _CHANGELOG_HELPERS_H
#define _CHANGELOG_HELPERS_H
-#include "locking.h"
-#include "timer.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
#include "pthread.h"
-#include "iobuf.h"
-#include "rot-buffs.h"
+#include <glusterfs/iobuf.h>
+#include <glusterfs/rot-buffs.h>
#include "changelog-misc.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "rpcsvc.h"
#include "changelog-ev-handle.h"
@@ -31,7 +31,7 @@
*/
typedef struct changelog_log_data {
/* rollover related */
- unsigned long cld_roll_time;
+ time_t cld_roll_time;
/* reopen changelog? */
gf_boolean_t cld_finale;
@@ -97,12 +97,6 @@ struct changelog_encoder {
typedef struct changelog_time_slice {
/**
- * just in case we need nanosecond granularity some day.
- * field is unused as of now (maybe we'd need it later).
- */
- struct timeval tv_start;
-
- /**
* version of changelog file, incremented each time changes
* rollover.
*/
@@ -190,8 +184,12 @@ typedef struct changelog_ev_selector {
/* changelog's private structure */
struct changelog_priv {
+ /* changelog journalling */
gf_boolean_t active;
+ /* changelog live notifications */
+ gf_boolean_t rpc_active;
+
/* to generate unique socket file per brick */
char *changelog_brick;
@@ -307,6 +305,24 @@ struct changelog_priv {
/* glusterfind dependency to capture paths on deleted entries*/
gf_boolean_t capture_del_path;
+
+ /* Save total no. of listners */
+ gf_atomic_t listnercnt;
+
+ /* Save total no. of xprt are associated with listner */
+ gf_atomic_t xprtcnt;
+
+ /* Save xprt list */
+ struct list_head xprt_list;
+
+ /* Save total no. of client connection */
+ gf_atomic_t clntcnt;
+
+ /* Save cleanup brick in victim */
+ xlator_t *victim;
+
+ /* Status to save cleanup notify status */
+ gf_boolean_t notify_down;
};
struct changelog_local {
@@ -401,11 +417,11 @@ changelog_local_t *
changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
int xtra_records, gf_boolean_t update_flag);
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale);
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
int
changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
int
changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
@@ -429,12 +445,11 @@ changelog_fsync_thread(void *data);
int
changelog_forget(xlator_t *this, inode_t *inode);
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer);
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
/* Geo-Rep snapshot dependency changes */
void
@@ -492,8 +507,6 @@ changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int);
int
changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *);
int
-changelog_cleanup_event_selection(xlator_t *, changelog_ev_selector_t *);
-int
changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int);
void
changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *);
@@ -656,8 +669,8 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \
do { \
if (!priv->active) { \
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_NOT_ACTIVE, \
- "Changelog is not active, return success"); \
+ gf_smsg(this->name, GF_LOG_WARNING, 0, \
+ CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \
ret = 0; \
goto label; \
} \
@@ -668,7 +681,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
goto label; \
} \
@@ -679,7 +692,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
flag = _gf_true; \
goto label; \
@@ -691,7 +704,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
pthread_mutex_unlock(&mutex); \
goto label; \
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
index 1e3786c6298..a2d8a9cbe93 100644
--- a/xlators/features/changelog/src/changelog-mem-types.h
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_MEM_TYPES_H
#define _CHANGELOG_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_changelog_mem_types {
gf_changelog_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
index dbf133ec836..cb0e16c85d8 100644
--- a/xlators/features/changelog/src/changelog-messages.h
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_MESSAGES_H_
#define _CHANGELOG_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -24,7 +24,7 @@
*/
GLFS_MSGID(
- CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_NO_MEMORY,
+ CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED,
CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR,
CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR,
CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
@@ -37,11 +37,11 @@ GLFS_MSGID(
CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO,
CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED,
CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO,
- CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_NOT_ACTIVE,
+ CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE,
CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED,
- CHANGELOG_MSG_HTIME_INFO, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR,
CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR,
CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED,
@@ -52,6 +52,121 @@ GLFS_MSGID(
CHANGELOG_MSG_FSTAT_OP_FAILED, CHANGELOG_MSG_LSEEK_OP_FAILED,
CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED,
CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
- CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED);
+ CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED,
+ CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
+ CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
+ CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
+ CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
+ CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
+ CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
+ CHANGELOG_MSG_CLEANUP_ALREADY_SET);
+#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \
+ "failed to barrier FOPs, disabling changelog barrier"
+#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed"
+#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool"
+#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \
+ "Entry cannot be captured for gfid, Capturing DATA entry."
+#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error"
+#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed"
+#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed"
+#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file"
+#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file"
+#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT."
+#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog"
+#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming"
+#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory"
+#define CHANGELOG_MSG_BNOTIFY_INFO_STR \
+ "Explicit rollover changelog signaling bnotify"
+#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait"
+#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable"
+#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \
+ "HTIME_CURRENT not found. Changelog enabled before init"
+#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT"
+#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \
+ "Changelog enable: Creating new HTIME file"
+#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info"
+#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed"
+#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found"
+#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \
+ "could not read the link from the gfid handle"
+#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file"
+#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back"
+#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \
+ "Failed to get buffer for RPC dispatch"
+#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread"
+#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)"
+#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)"
+#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \
+ "Could not find CHANGELOG in changelog path"
+#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed"
+#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \
+ "Error detecting empty changelog"
+#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \
+ "Fail snapshot because of previous errors"
+#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed"
+#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed"
+#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path"
+#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk"
+#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap"
+#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
+#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
+#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
+#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
+#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
+ "changelog local initialization failed"
+#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer"
+#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \
+ "could not set fd context(for release cbk)"
+#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed"
+#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification"
+#define CHANGELOG_MSG_BARRIER_ERROR_STR \
+ "Received another barrier off notification while already off"
+#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \
+ "Changelog barrier already disabled"
+#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \
+ "Received another barrier on notification when last one is not served yet"
+#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
+#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
+ "Something went wrong in dict_get_str_boolean"
+#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
+#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
+#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
+ "translator needs a single subvolume"
+#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \
+ "dangling volume. please check volfile"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \
+ "Dequeuing all the changelog barriered fops"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \
+ "Dequeuing changelog barriered fops is finished"
+#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \
+ "Disabling changelog barrier because of the timeout"
+#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \
+ "Couldn't add changelog barrier timeout event"
+#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options"
+#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify"
+#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc"
+#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame"
+#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply"
+#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program"
+#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \
+ "Changelog is not active, return success"
+#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \
+ "failed to put buffer after consumption"
+#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \
+ "cleanup_starting flag is already set for xl"
+#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error"
#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
index 04d1bdeba03..e2addc09414 100644
--- a/xlators/features/changelog/src/changelog-misc.h
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_MISC_H
#define _CHANGELOG_MISC_H
-#include "glusterfs.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/common-utils.h>
#define CHANGELOG_MAX_TYPE 4
#define CHANGELOG_FILE_NAME "CHANGELOG"
diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
index 91d6581836a..125246a17e1 100644
--- a/xlators/features/changelog/src/changelog-rpc-common.c
+++ b/xlators/features/changelog/src/changelog-rpc-common.c
@@ -11,7 +11,7 @@
#include "changelog-rpc-common.h"
#include "changelog-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
/**
*****************************************************
Client Interface
@@ -28,7 +28,7 @@ changelog_rpc_poller(void *arg)
{
xlator_t *this = arg;
- (void)event_dispatch(this->ctx->event_pool);
+ (void)gf_event_dispatch(this->ctx->event_pool);
return NULL;
}
@@ -47,10 +47,10 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
if (!options)
goto error_return;
- ret = rpc_transport_unix_options_build(&options, sockfile, 0);
+ ret = rpc_transport_unix_options_build(options, sockfile, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
- "failed to build rpc options");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
+ NULL);
goto dealloc_dict;
}
@@ -60,19 +60,19 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
ret = rpc_clnt_register_notify(rpc, fn, cbkdata);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
- "failed to register notify");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
goto dealloc_rpc_clnt;
}
ret = rpc_clnt_start(rpc);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
- "failed to start rpc");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
goto dealloc_rpc_clnt;
}
+ dict_unref(options);
return rpc;
dealloc_rpc_clnt:
@@ -164,8 +164,8 @@ changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc,
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED,
- "failed to create frame");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED,
+ NULL);
goto error_return;
}
@@ -238,8 +238,8 @@ changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg,
iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc);
if (!iob)
- gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
- "failed to serialize reply");
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ NULL);
else
iobref_add(iobref, iob);
@@ -260,6 +260,10 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
rpcsvc_listener_t *listener = NULL;
rpcsvc_listener_t *next = NULL;
struct rpcsvc_program *prog = NULL;
+ rpc_transport_t *trans = NULL;
+
+ if (!rpc)
+ return;
while (*progs) {
prog = *progs;
@@ -269,22 +273,25 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
list_for_each_entry_safe(listener, next, &rpc->listeners, list)
{
- rpcsvc_listener_destroy(listener);
+ if (listener->trans) {
+ trans = listener->trans;
+ rpc_transport_disconnect(trans, _gf_false);
+ }
}
(void)rpcsvc_unregister_notify(rpc, fn, this);
- sys_unlink(sockfile);
- if (rpc->rxpool) {
- mem_pool_destroy(rpc->rxpool);
- rpc->rxpool = NULL;
- }
/* TODO Avoid freeing rpc object in case of brick multiplex
after freeing rpc object svc->rpclock corrupted and it takes
more time to detach a brick
*/
- if (!this->cleanup_starting)
+ if (!this->cleanup_starting) {
+ if (rpc->rxpool) {
+ mem_pool_destroy(rpc->rxpool);
+ rpc->rxpool = NULL;
+ }
GF_FREE(rpc);
+ }
}
rpcsvc_t *
@@ -299,22 +306,25 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
if (!cbkdata)
cbkdata = this;
- ret = rpcsvc_transport_unix_options_build(&options, sockfile);
+ options = dict_new();
+ if (!options)
+ return NULL;
+
+ ret = rpcsvc_transport_unix_options_build(options, sockfile);
if (ret)
goto dealloc_dict;
rpc = rpcsvc_init(this, this->ctx, options, 8);
if (rpc == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
- "failed to init rpc");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
goto dealloc_dict;
}
ret = rpcsvc_register_notify(rpc, fn, cbkdata);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
- "failed to register notify function");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
goto dealloc_rpc;
}
@@ -328,11 +338,10 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
prog = *progs;
ret = rpcsvc_program_register(rpc, prog, _gf_false);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED,
- "cannot register program "
- "(name: %s, prognum: %d, pogver: %d)",
- prog->progname, prog->prognum, prog->progver);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s",
+ prog->progname, "prognum=%d", prog->prognum, "pogver=%d",
+ prog->progver, NULL);
goto dealloc_rpc;
}
diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h
index 2d3f06e60c0..4d9aa2c694b 100644
--- a/xlators/features/changelog/src/changelog-rpc-common.h
+++ b/xlators/features/changelog/src/changelog-rpc-common.h
@@ -13,8 +13,8 @@
#include "rpcsvc.h"
#include "rpc-clnt.h"
-#include "gf-event.h"
-#include "call-stub.h"
+#include <glusterfs/gf-event.h>
+#include <glusterfs/call-stub.h>
#include "changelog-xdr.h"
#include "xdr-generic.h"
diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c
index 828f85e8e45..440b88091a6 100644
--- a/xlators/features/changelog/src/changelog-rpc.c
+++ b/xlators/features/changelog/src/changelog-rpc.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "changelog-rpc.h"
#include "changelog-mem-types.h"
#include "changelog-ev-handle.h"
-struct rpcsvc_program *changelog_programs[];
+static struct rpcsvc_program *changelog_programs[];
static void
changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count)
@@ -43,9 +43,6 @@ changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv)
/** terminate dispatcher thread(s) */
changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers);
- /* TODO: what about pending and waiting connections? */
- changelog_ev_cleanup_connections(this, conn);
-
/* destroy locks */
ret = pthread_mutex_destroy(&conn->pending_lock);
if (ret != 0)
@@ -72,9 +69,6 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf,
int j = 0;
int ret = 0;
changelog_clnt_t *conn = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
conn = &priv->connections;
@@ -114,9 +108,9 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf,
/* spawn dispatcher threads */
for (; j < nr_dispatchers; j++) {
- snprintf(thread_name, sizeof(thread_name), "clogd%03hx", (j & 0x3ff));
ret = gf_thread_create(&priv->ev_dispatcher[j], NULL,
- changelog_ev_dispatch, conn, thread_name);
+ changelog_ev_dispatch, conn, "clogd%03hx",
+ j & 0x3ff);
if (ret != 0) {
changelog_cleanup_dispatchers(this, priv, j);
break;
@@ -147,48 +141,146 @@ int
changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
void *data)
{
+ xlator_t *this = NULL;
+ rpc_transport_t *trans = NULL;
+ rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
+ changelog_priv_t *priv = NULL;
+ uint64_t listnercnt = 0;
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ gf_boolean_t listner_found = _gf_false;
+ socket_private_t *sockpriv = NULL;
+
+ if (!xl || !data || !rpc) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ this = xl;
+ trans = data;
+ priv = this->private;
+
+ if (!priv) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without priv initializing");
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_ACCEPT) {
+ GF_ATOMIC_INC(priv->xprtcnt);
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&trans->list, &priv->xprt_list);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_DISCONNECT) {
+ list_for_each_entry_safe(listener, next, &rpc->listeners, list)
+ {
+ if (listener && listener->trans) {
+ if (listener->trans == trans) {
+ listnercnt = GF_ATOMIC_DEC(priv->listnercnt);
+ listner_found = _gf_true;
+ rpcsvc_listener_destroy(listener);
+ }
+ }
+ }
+
+ if (listnercnt > 0) {
+ goto out;
+ }
+ if (listner_found) {
+ LOCK(&priv->lock);
+ list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list)
+ {
+ sockpriv = (socket_private_t *)(xprt->private);
+ gf_log("changelog", GF_LOG_INFO,
+ "Send disconnect"
+ " on socket %d",
+ sockpriv->sock);
+ rpc_transport_disconnect(xprt, _gf_false);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+ LOCK(&priv->lock);
+ {
+ list_del_init(&trans->list);
+ }
+ UNLOCK(&priv->lock);
+
+ xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ changelog_process_cleanup_event(this);
+ }
+ }
+
+out:
return 0;
}
void
+changelog_process_cleanup_event(xlator_t *this)
+{
+ gf_boolean_t cleanup_notify = _gf_false;
+ changelog_priv_t *priv = NULL;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+
+ if (!this)
+ return;
+ priv = this->private;
+ if (!priv)
+ return;
+
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->victim && !cleanup_notify) {
+ default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim);
+
+ if (priv->rpc) {
+ /* sockfile path could have been saved to avoid this */
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify,
+ this);
+ if (priv->rpc->rxpool) {
+ mem_pool_destroy(priv->rpc->rxpool);
+ priv->rpc->rxpool = NULL;
+ }
+ GF_FREE(priv->rpc);
+ priv->rpc = NULL;
+ }
+ }
+}
+
+void
changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv)
{
char sockfile[UNIX_PATH_MAX] = {
0,
};
- changelog_clnt_t *c_clnt = &priv->connections;
- changelog_rpc_clnt_t *crpc = NULL;
- int nofconn = 0;
/* sockfile path could have been saved to avoid this */
CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX);
changelog_rpc_server_destroy(this, priv->rpc, sockfile,
changelog_rpcsvc_notify, changelog_programs);
-
- /* TODO Below approach is not perfect to wait for cleanup
- all active connections without this code brick process
- can be crash in case of brick multiplexing if any in-progress
- request process on rpc by changelog xlator after
- cleanup resources
- */
-
- if (c_clnt) {
- do {
- nofconn = 0;
- LOCK(&c_clnt->active_lock);
- list_for_each_entry(crpc, &c_clnt->active, list) { nofconn++; }
- UNLOCK(&c_clnt->active_lock);
- LOCK(&c_clnt->wait_lock);
- list_for_each_entry(crpc, &c_clnt->waitq, list) { nofconn++; }
- UNLOCK(&c_clnt->wait_lock);
- pthread_mutex_lock(&c_clnt->pending_lock);
- list_for_each_entry(crpc, &c_clnt->pending, list) { nofconn++; }
- pthread_mutex_unlock(&c_clnt->pending_lock);
-
- } while (nofconn); /* Wait for all connection cleanup */
- }
-
- (void)changelog_cleanup_rpc_threads(this, priv);
}
rpcsvc_t *
@@ -287,16 +379,15 @@ changelog_handle_probe(rpcsvc_request_t *req)
this = req->trans->xl;
if (this->cleanup_starting) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
- "cleanup_starting flag is already set for xl");
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET,
+ NULL);
return 0;
}
ret = xdr_to_generic(req->msg[0], &rpc_req,
(xdrproc_t)xdr_changelog_probe_req);
if (ret < 0) {
- gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
- "xdr decoding error");
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL);
req->rpc_err = GARBAGE_ARGS;
goto handle_xdr_error;
}
@@ -328,13 +419,13 @@ submit_rpc:
* RPC declarations
*/
-rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
+static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
[CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER",
- CHANGELOG_RPC_PROBE_FILTER,
- changelog_handle_probe, NULL, 0, DRC_NA},
+ changelog_handle_probe, NULL,
+ CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0},
};
-struct rpcsvc_program changelog_svc_prog = {
+static struct rpcsvc_program changelog_svc_prog = {
.progname = CHANGELOG_RPC_PROGNAME,
.prognum = CHANGELOG_RPC_PROGNUM,
.progver = CHANGELOG_RPC_PROGVER,
@@ -343,7 +434,7 @@ struct rpcsvc_program changelog_svc_prog = {
.synctask = _gf_true,
};
-struct rpcsvc_program *changelog_programs[] = {
+static struct rpcsvc_program *changelog_programs[] = {
&changelog_svc_prog,
NULL,
};
diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h
index 8002cea5091..b1707565249 100644
--- a/xlators/features/changelog/src/changelog-rpc.h
+++ b/xlators/features/changelog/src/changelog-rpc.h
@@ -11,7 +11,7 @@
#ifndef __CHANGELOG_RPC_H
#define __CHANGELOG_RPC_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "changelog-helpers.h"
/* one time */
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
index 968c76b8b20..841545ae359 100644
--- a/xlators/features/changelog/src/changelog-rt.c
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
#include "changelog-rt.h"
#include "changelog-mem-types.h"
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
index df0d5b03487..28b9827d85b 100644
--- a/xlators/features/changelog/src/changelog-rt.h
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_RT_H
#define _CHANGELOG_RT_H
-#include "locking.h"
-#include "timer.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
#include "pthread.h"
#include "changelog-helpers.h"
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 35a523316ed..6a6e5af859e 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "syscall.h"
-#include "logging.h"
-#include "iobuf.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
#include "changelog-rt.h"
@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = {
},
};
+static int
+changelog_init_rpc(xlator_t *this, changelog_priv_t *priv);
+
+static int
+changelog_init(xlator_t *this, changelog_priv_t *priv);
+
/* Entry operations - TYPE III */
/**
@@ -149,9 +155,8 @@ changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=rmdir", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -298,9 +303,8 @@ changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=unlink", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -418,9 +422,8 @@ changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=rename", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
/* changelog barrier */
@@ -531,8 +534,7 @@ changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED,
"fop=link", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -660,9 +662,8 @@ changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=mkdir", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -782,9 +783,8 @@ changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=symlink", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -929,9 +929,8 @@ changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=mknod", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -972,8 +971,8 @@ changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
CHANGELOG_OP_TYPE_RELEASE)) {
ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
- "could not set fd context (for release cbk)");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
}
changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
@@ -1083,9 +1082,8 @@ changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=create", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -1388,9 +1386,6 @@ changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
ret = changelog_fill_entry_buf(frame, this, loc, &local);
if (ret) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO,
- "Entry cannot be"
- " captured for gfid, Capturing DATA"
- " entry.",
"gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -1806,8 +1801,8 @@ changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
CHANGELOG_OP_TYPE_RELEASE)) {
ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
- "could not set fd context (for release cbk)");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
}
unwind:
@@ -2004,6 +1999,15 @@ notify(xlator_t *this, int event, void *data, ...)
struct list_head queue = {
0,
};
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ changelog_clnt_t *conn = NULL;
+ gf_boolean_t cleanup_notify = _gf_false;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
INIT_LIST_HEAD(&queue);
@@ -2011,6 +2015,50 @@ notify(xlator_t *this, int event, void *data, ...)
if (!priv)
goto out;
+ if (event == GF_EVENT_PARENT_DOWN) {
+ priv->victim = data;
+ gf_log(this->name, GF_LOG_INFO,
+ "cleanup changelog rpc connection of brick %s",
+ priv->victim->name);
+
+ if (priv->rpc_active) {
+ this->cleanup_starting = 1;
+ changelog_destroy_rpc_listner(this, priv);
+ conn = &priv->connections;
+ if (conn)
+ changelog_ev_cleanup_connections(this, conn);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+ if (priv->rpc) {
+ list_for_each_entry_safe(listener, next,
+ &priv->rpc->listeners, list)
+ {
+ if (listener->trans) {
+ rpc_transport_unref(listener->trans);
+ }
+ }
+ rpcsvc_destroy(priv->rpc);
+ priv->rpc = NULL;
+ }
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ if (!cleanup_notify)
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ } else {
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ goto out;
+ }
+
if (event == GF_EVENT_TRANSLATOR_OP) {
dict = data;
@@ -2018,15 +2066,15 @@ notify(xlator_t *this, int event, void *data, ...)
switch (barrier) {
case DICT_ERROR:
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_DICT_GET_FAILED,
- "Barrier dict_get_str_boolean failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean",
+ NULL);
ret = -1;
goto out;
case BARRIER_OFF:
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Barrier off notification");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL);
CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
LOCK(&priv->c_snap_lock);
@@ -2043,10 +2091,8 @@ notify(xlator_t *this, int event, void *data, ...)
UNLOCK(&priv->bflags.lock);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Received another barrier off"
- " notification while already off");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR, NULL);
goto out;
}
@@ -2064,13 +2110,11 @@ notify(xlator_t *this, int event, void *data, ...)
*/
if (ret == 0) {
chlog_barrier_dequeue_all(this, &queue);
- gf_msg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_MSG_BARRIER_INFO,
- "Disabled changelog barrier");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_DISABLED, NULL);
} else {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Changelog barrier already disabled");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL);
}
LOCK(&priv->bflags.lock);
@@ -2082,8 +2126,8 @@ notify(xlator_t *this, int event, void *data, ...)
goto out;
case BARRIER_ON:
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Barrier on notification");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL);
CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
LOCK(&priv->c_snap_lock);
@@ -2102,11 +2146,8 @@ notify(xlator_t *this, int event, void *data, ...)
UNLOCK(&priv->bflags.lock);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Received another barrier on"
- "notification when last one is"
- "not served yet");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, NULL);
goto out;
}
@@ -2129,14 +2170,14 @@ notify(xlator_t *this, int event, void *data, ...)
goto out;
}
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Enabled changelog barrier");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_ENABLE, NULL);
ret = changelog_barrier_notify(priv, buf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_WRITE_FAILED,
- "Explicit roll over: write failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over",
+ NULL);
changelog_barrier_cleanup(this, priv, &queue);
ret = -1;
goto out;
@@ -2160,21 +2201,20 @@ notify(xlator_t *this, int event, void *data, ...)
}
ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
- "Woke up: bnotify conditional wait");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL);
goto out;
case DICT_DEFAULT:
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_DICT_GET_FAILED, "barrier key not found");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL);
ret = -1;
goto out;
default:
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- CHANGELOG_MSG_DICT_GET_FAILED,
- "Something went bad in dict_get_str_boolean");
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL);
ret = -1;
goto out;
}
@@ -2200,9 +2240,8 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Memory accounting"
- " init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL);
return ret;
}
@@ -2213,23 +2252,11 @@ static int
changelog_init(xlator_t *this, changelog_priv_t *priv)
{
int i = 0;
- int ret = -1;
- struct timeval tv = {
- 0,
- };
+ int ret = 0;
changelog_log_data_t cld = {
0,
};
- ret = gettimeofday(&tv, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_GET_TIME_OP_FAILED, "gettimeofday() failure");
- goto out;
- }
-
- priv->slice.tv_start = tv;
-
priv->maps[CHANGELOG_TYPE_DATA] = "D ";
priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
@@ -2248,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv)
* in case there was an encoding change. so... things are kept
* simple here.
*/
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, _gf_false);
ret = htime_open(this, priv, cld.cld_roll_time);
/* call htime open with cld's rollover_time */
@@ -2288,8 +2313,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "bnotify pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2297,8 +2322,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "bnotify pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2306,8 +2331,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "drain_black pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2315,8 +2340,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "drain_black pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2324,8 +2349,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "drain_white pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2333,8 +2358,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "drain_white pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2343,7 +2368,7 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "changelog_rollover lock init failed", "ret=%d", ret, NULL);
+ "name=changelog_rollover", "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2394,6 +2419,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv)
LOCK_DESTROY(&priv->bflags.lock);
}
+static void
+changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
+{
+ /* terminate rpc server */
+ if (!this->cleanup_starting)
+ changelog_destroy_rpc_listner(this, priv);
+
+ (void)changelog_cleanup_rpc_threads(this, priv);
+ /* cleanup rot buffs */
+ rbuf_dtor(priv->rbuf);
+
+ /* cleanup poller thread */
+ if (priv->poller)
+ (void)changelog_thread_cleanup(this, priv->poller);
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -2402,6 +2443,9 @@ reconfigure(xlator_t *this, dict_t *options)
changelog_priv_t *priv = NULL;
gf_boolean_t active_earlier = _gf_true;
gf_boolean_t active_now = _gf_true;
+ gf_boolean_t rpc_active_earlier = _gf_true;
+ gf_boolean_t rpc_active_now = _gf_true;
+ gf_boolean_t iniate_rpc = _gf_false;
changelog_time_slice_t *slice = NULL;
changelog_log_data_t cld = {
0,
@@ -2412,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options)
char csnap_dir[PATH_MAX] = {
0,
};
- struct timeval tv = {
- 0,
- };
uint32_t timeout = 0;
priv = this->private;
@@ -2423,14 +2464,15 @@ reconfigure(xlator_t *this, dict_t *options)
ret = -1;
active_earlier = priv->active;
+ rpc_active_earlier = priv->rpc_active;
/* first stop the rollover and the fsync thread */
changelog_cleanup_helper_threads(this, priv);
GF_OPTION_RECONF("changelog-dir", tmp, options, str, out);
if (!tmp) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
- "\"changelog-dir\" option is not set");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
+ NULL);
goto out;
}
@@ -2456,6 +2498,29 @@ reconfigure(xlator_t *this, dict_t *options)
goto out;
GF_OPTION_RECONF("changelog", active_now, options, bool, out);
+ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool,
+ out);
+
+ /* If journalling is enabled, enable rpc notifications */
+ if (active_now && !active_earlier) {
+ if (!rpc_active_earlier)
+ iniate_rpc = _gf_true;
+ }
+
+ if (rpc_active_now && !rpc_active_earlier) {
+ iniate_rpc = _gf_true;
+ }
+
+ /* TODO: Disable of changelog-notifications is not supported for now
+ * as there is no clean way of cleaning up of rpc resources
+ */
+
+ if (iniate_rpc) {
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto out;
+ priv->rpc_active = _gf_true;
+ }
/**
* changelog_handle_change() handles changes that could possibly
@@ -2482,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
if (active_now || active_earlier) {
- ret = changelog_fill_rollover_data(&cld, !active_now);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, !active_now);
slice = &priv->slice;
@@ -2501,15 +2564,9 @@ reconfigure(xlator_t *this, dict_t *options)
if (active_now) {
if (!active_earlier) {
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "Reconfigure: Changelog Enable");
- if (gettimeofday(&tv, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_HTIME_ERROR, "unable to fetch htime");
- ret = -1;
- goto out;
- }
- htime_create(this, priv, tv.tv_sec);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
+ NULL);
+ htime_create(this, priv, gf_time());
}
ret = changelog_spawn_helper_threads(this, priv);
}
@@ -2534,8 +2591,7 @@ changelog_freeup_options(xlator_t *this, changelog_priv_t *priv)
ret = priv->cb->dtor(this, &priv->cd);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED,
- "could not cleanup bootstrapper");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL);
GF_FREE(priv->changelog_brick);
GF_FREE(priv->changelog_dir);
}
@@ -2587,6 +2643,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv)
goto dealloc_2;
GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2);
+ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2);
GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2);
GF_OPTION_INIT("op-mode", tmp, str, dealloc_2);
@@ -2625,20 +2682,6 @@ error_return:
return -1;
}
-static void
-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
-{
- /* terminate rpc server */
- changelog_destroy_rpc_listner(this, priv);
-
- /* cleanup rot buffs */
- rbuf_dtor(priv->rbuf);
-
- /* cleanup poller thread */
- if (priv->poller)
- (void)changelog_thread_cleanup(this, priv->poller);
-}
-
static int
changelog_init_rpc(xlator_t *this, changelog_priv_t *priv)
{
@@ -2679,14 +2722,14 @@ init(xlator_t *this)
GF_VALIDATE_OR_GOTO("changelog", this, error_return);
if (!this->children || this->children->next) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED,
- "translator needs a single subvolume");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED,
+ NULL);
goto error_return;
}
if (!this->parents) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED,
- "dangling volume. please check volfile");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED,
+ NULL);
goto error_return;
}
@@ -2696,13 +2739,18 @@ init(xlator_t *this)
this->local_pool = mem_pool_new(changelog_local_t, 64);
if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "failed to create local memory pool");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
+ NULL);
goto cleanup_priv;
}
LOCK_INIT(&priv->lock);
LOCK_INIT(&priv->c_snap_lock);
+ GF_ATOMIC_INIT(priv->listnercnt, 0);
+ GF_ATOMIC_INIT(priv->clntcnt, 0);
+ GF_ATOMIC_INIT(priv->xprtcnt, 0);
+ INIT_LIST_HEAD(&priv->xprt_list);
+ priv->htime_fd = -1;
ret = changelog_init_options(this, priv);
if (ret)
@@ -2730,10 +2778,13 @@ init(xlator_t *this)
INIT_LIST_HEAD(&priv->queue);
priv->barrier_enabled = _gf_false;
- /* RPC ball rolling.. */
- ret = changelog_init_rpc(this, priv);
- if (ret)
- goto cleanup_barrier;
+ if (priv->rpc_active || priv->active) {
+ /* RPC ball rolling.. */
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto cleanup_barrier;
+ priv->rpc_active = _gf_true;
+ }
ret = changelog_init(this, priv);
if (ret)
@@ -2745,13 +2796,16 @@ init(xlator_t *this)
return 0;
cleanup_rpc:
- changelog_cleanup_rpc(this, priv);
+ if (priv->rpc_active) {
+ changelog_cleanup_rpc(this, priv);
+ }
cleanup_barrier:
changelog_barrier_pthread_destroy(priv);
cleanup_options:
changelog_freeup_options(this, priv);
cleanup_mempool:
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
cleanup_priv:
GF_FREE(priv);
error_return:
@@ -2770,9 +2824,11 @@ fini(xlator_t *this)
priv = this->private;
if (priv) {
- /* terminate RPC server/threads */
- changelog_cleanup_rpc(this, priv);
-
+ if (priv->active || priv->rpc_active) {
+ /* terminate RPC server/threads */
+ changelog_cleanup_rpc(this, priv);
+ GF_FREE(priv->ev_dispatcher);
+ }
/* call barrier_disable to cancel timer */
if (priv->barrier_enabled)
__chlog_barrier_disable(this, &queue);
@@ -2841,6 +2897,13 @@ struct volume_options options[] = {
.flags = OPT_FLAG_SETTABLE,
.level = OPT_STATUS_BASIC,
.tags = {"journal", "georep", "glusterfind"}},
+ {.key = {"changelog-notification"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable changelog live notification",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .tags = {"bitrot", "georep"}},
{.key = {"changelog-brick"},
.type = GF_OPTION_TYPE_PATH,
.description = "brick path to generate unique socket file name."
@@ -2910,3 +2973,17 @@ struct volume_options options[] = {
.tags = {"journal", "glusterfind"}},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "changelog",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/features/changetimerecorder/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am
deleted file mode 100644
index 620017e3309..00000000000
--- a/xlators/features/changetimerecorder/src/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-# changetimerecorder can only get build when libgfdb is enabled
-if BUILD_GFDB
- xlator_LTLIBRARIES = changetimerecorder.la
-endif
-
-changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-changetimerecorder_la_SOURCES = changetimerecorder.c \
- ctr-helper.c ctr-xlator-ctx.c
-
-changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
- $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la
-
-noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \
- ctr-helper.h ctr-xlator-ctx.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/libglusterfs/src/gfdb \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -DDATADIR=\"$(localstatedir)\"
-
-AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS)
-
-CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
deleted file mode 100644
index 3c63c43ee3d..00000000000
--- a/xlators/features/changetimerecorder/src/changetimerecorder.c
+++ /dev/null
@@ -1,2357 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "gfdb_sqlite3.h"
-#include "ctr-helper.h"
-#include "ctr-messages.h"
-#include "syscall.h"
-
-#include "changetimerecorder.h"
-#include "tier-ctr-interface.h"
-
-/*******************************inode forget***********************************/
-int
-ctr_forget(xlator_t *this, inode_t *inode)
-{
- fini_ctr_xlator_ctx(this, inode);
- return 0;
-}
-
-/************************** Look up heal **************************************/
-/*
-Problem: The CTR xlator records file meta (heat/hardlinks)
-into the data. This works fine for files which are created
-after ctr xlator is switched ON. But for files which were
-created before CTR xlator is ON, CTR xlator is not able to
-record either of the meta i.e heat or hardlinks. Thus making
-those files immune to promotions/demotions.
-
-Solution: The solution that is implemented in this patch is
-do ctr-db heal of all those pre-existent files, using named lookup.
-For this purpose we use the inode-xlator context variable option
-in gluster.
-The inode-xlator context variable for ctr xlator will have the
-following,
- a. A Lock for the context variable
- b. A hardlink list: This list represents the successful looked
- up hardlinks.
-These are the scenarios when the hardlink list is updated:
-1) Named-Lookup: Whenever a named lookup happens on a file, in the
- wind path we copy all required hardlink and inode information to
- ctr_db_record structure, which resides in the frame->local variable.
- We don't update the database in wind. During the unwind, we read the
- information from the ctr_db_record and ,
- Check if the inode context variable is created, if not we create it.
- Check if the hard link is there in the hardlink list.
- If its not there we add it to the list and send a update to the
- database using libgfdb.
- Please note: The database transaction can fail(and we ignore) as there
- already might be a record in the db. This update to the db is to heal
- if its not there.
- If its there in the list we ignore it.
-2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in
- the inode context variable and delete the inode context variable.
- Please note: An inode forget may happen for two reason,
- a. when the inode is delete.
- b. the in-memory inode is evicted from the inode table due to cache limits.
-3) create: whenever a create happens we create the inode context variable and
- add the hardlink. The database updation is done as usual by ctr.
-4) link: whenever a hardlink is created for the inode, we create the inode
- context variable, if not present, and add the hardlink to the list.
-5) unlink: whenever a unlink happens we delete the hardlink from the list.
-6) mknod: same as create.
-7) rename: whenever a rename happens we update the hardlink in list. if the
- hardlink was not present for updation, we add the hardlink to the list.
-
-What is pending:
-1) This solution will only work for named lookups.
-2) We don't track afr-self-heal/dht-rebalancer traffic for healing.
-
-*/
-
-/* This function does not write anything to the db,
- * just created the local variable
- * for the frame and sets values for the ctr_db_record */
-static int
-ctr_lookup_wind(call_frame_t *frame, xlator_t *this,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
- GF_ASSERT(this);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
- frame->local = init_ctr_local_t(this);
- if (!frame->local) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error while creating ctr local");
- goto out;
- };
- ctr_local = frame->local;
- /*Definitely no internal fops will reach here*/
- ctr_local->is_internal_fop = _gf_false;
- /*Don't record counters*/
- CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
- /*Don't record time at all*/
- CTR_DB_REC(ctr_local).do_record_times = _gf_false;
-
- /* Copy gfid into db record*/
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
-
- /* Set fop_path and fop_type, required by libgfdb to make
- * decision while inserting the record */
- CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
-
- /* Copy hard link info*/
- gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
- *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
- if (snprintf(CTR_DB_REC(ctr_local).file_name,
- sizeof(CTR_DB_REC(ctr_local).file_name), "%s",
- NEW_LINK_CX(ctr_inode_cx)->basename) >=
- sizeof(CTR_DB_REC(ctr_local).file_name)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error copying filename of ctr local");
- goto out;
- }
- /* Since we are in lookup we can ignore errors while
- * Inserting in the DB, because there may be many
- * to write to the DB attempts for healing.
- * We don't want to log all failed attempts and
- * bloat the log*/
- ctr_local->gfdb_db_record.ignore_errors = _gf_true;
- }
-
- ret = 0;
-
-out:
-
- if (ret) {
- free_ctr_local(ctr_local);
- frame->local = NULL;
- }
-
- return ret;
-}
-
-/* This function inserts the ctr_db_record populated by ctr_lookup_wind
- * in to the db. It also destroys the frame->local created by ctr_lookup_wind */
-static int
-ctr_lookup_unwind(call_frame_t *frame, xlator_t *this)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- ctr_local = frame->local;
-
- if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) {
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret == -1) {
- gf_msg(this->name,
- _gfdb_log_level(GF_LOG_ERROR,
- ctr_local->gfdb_db_record.ignore_errors),
- 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
- }
- ret = 0;
-out:
- free_ctr_local(ctr_local);
- frame->local = NULL;
- return ret;
-}
-
-/******************************************************************************
- *
- * FOPS HANDLING BELOW
- *
- * ***************************************************************************/
-
-/****************************LOOKUP********************************************/
-
-int32_t
-ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
- ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
- gf_boolean_t _is_heal_needed = _gf_false;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- /* if the lookup failed lookup don't do anything*/
- if (op_ret == -1) {
- gf_msg_trace(this->name, 0, "lookup failed with %s",
- strerror(op_errno));
- goto out;
- }
-
- /* Ignore directory lookups */
- if (inode->ia_type == IA_IFDIR) {
- goto out;
- }
-
- /* if frame local was not set by the ctr_lookup()
- * so don't so anything*/
- if (!frame->local) {
- goto out;
- }
-
- /* if the lookup is for dht link donot record*/
- if (dht_is_linkfile(buf, dict)) {
- gf_msg_trace(this->name, 0,
- "Ignoring Lookup "
- "for dht link file");
- goto out;
- }
-
- ctr_local = frame->local;
- /*Assign the proper inode type*/
- ctr_local->ia_inode_type = inode->ia_type;
-
- /* Copy gfid directly from inode */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
-
- /* Checking if gfid and parent gfid is valid */
- if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) ||
- gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) {
- gf_msg_trace(this->name, 0, "Invalid GFID");
- goto out;
- }
-
- /* if its a first entry
- * then mark the ctr_record for create
- * A create will attempt a file and a hard link created in the db*/
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- /* This marks inode heal */
- CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
- _is_heal_needed = _gf_true;
- }
-
- /* Copy the correct gfid from resolved inode */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
-
- /* Add hard link to the list */
- ret_val = add_hard_link_ctx(frame, this, inode);
- if (ret_val == CTR_CTX_ERROR) {
- gf_msg_trace(this->name, 0, "Failed adding hardlink to list");
- goto out;
- }
- /* If inode needs healing then heal the hardlink also */
- else if (ret_val & CTR_TRY_INODE_HEAL) {
- /* This marks inode heal */
- CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
- _is_heal_needed = _gf_true;
- }
- /* If hardlink needs healing */
- else if (ret_val & CTR_TRY_HARDLINK_HEAL) {
- _is_heal_needed = _gf_true;
- }
-
- /* If lookup heal needed */
- if (!_is_heal_needed)
- goto out;
-
- /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind
- * in to the db. It also destroys the frame->local
- * created by ctr_lookup_wind */
- ret = ctr_lookup_unwind(frame, this);
- if (ret) {
- gf_msg_trace(this->name, 0, "Failed healing/inserting link");
- }
-
-out:
- free_ctr_local((gf_ctr_local_t *)frame->local);
- frame->local = NULL;
-
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict,
- postparent);
-
- return 0;
-}
-
-int32_t
-ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /* Don't handle nameless lookups*/
- if (!loc->parent || !loc->name)
- goto out;
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out);
-
- /* Fill ctr inode context*/
- /* IA_IFREG : We assume its a file in the wind
- * but in the unwind we are sure what the inode is a file
- * or directory
- * gfid: we are just filling loc->gfid which is not correct.
- * In unwind we fill the correct gfid for successful lookup*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /* Create the frame->local and populate ctr_db_record
- * No writing to the db yet */
- ret = ctr_lookup_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
- "Failed to insert link wind");
- }
-
-out:
- STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-/****************************WRITEV********************************************/
-int32_t
-ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED,
- "Failed to insert writev unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
- "Failed to insert writev wind");
- }
-
-out:
- STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
- iobref, xdata);
-
- return 0;
-}
-
-/******************************setattr*****************************************/
-
-int32_t
-ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- "Failed to insert setattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf,
- postop_stbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert setattr wind");
- }
-out:
-
- STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
-
- return 0;
-}
-
-/*************************** fsetattr ***************************************/
-int32_t
-ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- "Failed to insert fsetattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf,
- postop_stbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert fsetattr wind");
- }
-out:
- STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
-
- return 0;
-}
-/****************************fremovexattr************************************/
-
-int32_t
-ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
- "Failed to insert fremovexattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
- "Failed to insert fremovexattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-/****************************removexattr*************************************/
-
-int32_t
-ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
- "Failed to insert removexattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
- "Failed to insert removexattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-/****************************truncate****************************************/
-
-int32_t
-ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
- "Failed to insert truncate unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED,
- "Failed to insert truncate wind");
- }
-out:
- STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-/****************************ftruncate***************************************/
-
-int32_t
-ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
- "Failed to insert ftruncate unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED,
- "Failed to insert ftruncate wind");
- }
-
-out:
- STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-/****************************rename******************************************/
-int32_t
-ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int ret = -1;
- uint32_t remaining_links = -1;
- gf_ctr_local_t *ctr_local = NULL;
- gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP;
- gfdb_fop_path_t fop_path = GFDB_FOP_INVALID;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- "Failed to insert rename unwind");
- goto out;
- }
-
- if (!xdata)
- goto out;
- /*
- *
- * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
- * This is only set when we are overwriting hardlinks.
- *
- * */
- ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
- &remaining_links);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
- remaining_links = -1;
- goto out;
- }
-
- ctr_local = frame->local;
- if (!ctr_local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL,
- "ctr_local is NULL.");
- goto out;
- }
-
- /* This is not the only link */
- if (remaining_links > 1) {
- fop_type = GFDB_FOP_DENTRY_WRITE;
- fop_path = GFDB_FOP_UNDEL;
- }
- /* Last link that was deleted */
- else if (remaining_links == 1) {
- fop_type = GFDB_FOP_DENTRY_WRITE;
- fop_path = GFDB_FOP_UNDEL_ALL;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- "Invalid link count from posix");
- goto out;
- }
-
- ret = ctr_delete_hard_link_from_db(
- this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name, fop_type, fop_path);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to delete records of %s",
- CTR_DB_REC(ctr_local).old_file_name);
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t new_link_cx, old_link_cx;
- gf_ctr_link_context_t *_nlink_cx = &new_link_cx;
- gf_ctr_link_context_t *_olink_cx = &old_link_cx;
- int is_dict_created = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill old link context*/
- FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out);
-
- /*Fill new link context*/
- FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
- oldloc->inode->gfid, _nlink_cx, _olink_cx,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /* If the rename is a overwrite of hardlink
- * rename ("file1", "file2")
- * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A
- * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B
- * so we are saving file2 gfid in old_gfid so that we delete entries
- * from the db during rename callback if the fop is successful
- * */
- if (newloc->inode) {
- /* This is the GFID from where the newloc hardlink will be
- * unlinked */
- _inode_cx->old_gfid = &newloc->inode->gfid;
- }
-
- /* Is a metatdata fop */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED,
- "Failed to insert rename wind");
- } else {
- /* We are doing updation of hard link in inode context in wind
- * As we don't get the "inode" in the call back for rename */
- ret = update_hard_link_ctx(frame, this, oldloc->inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED,
- "Failed "
- "updating hard link in ctr inode context");
- goto out;
- }
-
- /* If the newloc has an inode. i.e acquiring hardlink of an
- * exisitng file i.e overwritting a file.
- * */
- if (newloc->inode) {
- /* Getting the ctr inode context variable for
- * inode whose hardlink will be acquired during
- * the rename
- * */
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode);
- if (!ctr_xlator_ctx) {
- /* Since there is no ctr inode context
- * so nothing more to do */
- ret = 0;
- goto out;
- }
-
- /* Deleting hardlink from context variable */
- ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid,
- newloc->name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- /* Requesting for number of hardlinks on the newloc
- * inode from POSIX.
- * */
- is_dict_created = set_posix_link_request(this, &xdata);
- if (is_dict_created == -1) {
- ret = -1;
- goto out;
- }
- }
- }
-
-out:
- STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
-
- if (is_dict_created == 1) {
- dict_unref(xdata);
- }
-
- return 0;
-}
-
-/****************************unlink******************************************/
-int32_t
-ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
- uint32_t remaining_links = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- if (!xdata)
- goto out;
-
- /*
- *
- * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
- *
- * */
- ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
- &remaining_links);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
- remaining_links = -1;
- }
-
- /*This is not the only link*/
- if (remaining_links != 1) {
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNDEL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink unwind");
- }
- }
- /*Last link that was deleted*/
- else if (remaining_links == 1) {
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNDEL_ALL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink unwind");
- }
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- gf_boolean_t is_xdata_created = _gf_false;
- struct iatt dummy_stat = {0};
-
- GF_ASSERT(frame);
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- _link_cx, NULL, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_WDEL);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* Is a metadata FOP */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink wind");
- } else {
- /* We are doing delete of hard link in inode context in wind
- * As we don't get the "inode" in the call back for rename */
- ret = delete_hard_link_ctx(frame, this, loc->inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed "
- "deleting hard link from ctr inode context");
- }
- }
-
- /*
- *
- * Sending GF_REQUEST_LINK_COUNT_XDATA
- * to POSIX Xlator to send link count in unwind path
- *
- * */
- /*create xdata if NULL*/
- if (!xdata) {
- xdata = dict_new();
- is_xdata_created = (xdata) ? _gf_true : _gf_false;
- }
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
- "xdata is NULL :Cannot send "
- "GF_REQUEST_LINK_COUNT_XDATA to posix");
- goto out;
- }
-
- ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
- if (is_xdata_created) {
- dict_unref(xdata);
- }
- goto out;
- }
-
-out:
- STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
-
- if (is_xdata_created)
- dict_unref(xdata);
-
- return 0;
-}
-
-/****************************fsync******************************************/
-int32_t
-ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert fsync unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
- "Failed to insert fsync wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-/****************************setxattr****************************************/
-
-int
-ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert setxattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int
-ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
- int flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert setxattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
- return 0;
-}
-/**************************** fsetxattr *************************************/
-int32_t
-ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert fsetxattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert fsetxattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-/****************************mknod*******************************************/
-
-int32_t
-ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
- ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- /* Add hard link to the list */
- ret_val = add_hard_link_ctx(frame, this, inode);
- if (ret_val == CTR_CTX_ERROR) {
- gf_msg_trace(this->name, 0, "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
- "Failed to insert mknod unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-
- return 0;
-}
-
-int
-ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- uuid_t gfid = {
- 0,
- };
- uuid_t *ptr_gfid = &gfid;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*get gfid from xdata dict*/
- ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
- if (ret) {
- gf_msg_debug(this->name, 0, "failed to get gfid from dict");
- goto out;
- }
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
- NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED,
- "Failed to insert mknod wind");
- }
-
-out:
- STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
- return 0;
-}
-
-/****************************create******************************************/
-int
-ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = add_hard_link_ctx(frame, this, inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED,
- "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- uuid_t gfid = {
- 0,
- };
- uuid_t *ptr_gfid = &gfid;
- struct iatt dummy_stat = {0};
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*Get GFID from Xdata dict*/
- ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
- "failed to get gfid from dict");
- goto out;
- }
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
- NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, &ctr_inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED,
- "Failed to insert create wind");
- }
-out:
- STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-}
-
-/****************************link********************************************/
-
-int
-ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- /* Add hard link to the list */
- ret = add_hard_link_ctx(frame, this, inode);
- if (ret) {
- gf_msg_trace(this->name, 0, "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int
-ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- struct iatt dummy_stat = {0};
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
- oldloc->inode->gfid, _link_cx, NULL,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* Is a metadata fop */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
- "Failed to insert link wind");
- }
-
-out:
- STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-/******************************readv*****************************************/
-int
-ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
- return 0;
-}
-
-int
-ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
- uint32_t flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED,
- "Failed to insert readv wind");
- }
-
-out:
- STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata);
- return 0;
-}
-
-/*******************************ctr_ipc****************************************/
-
-/*This is the call back function per record/file from data base*/
-static int
-ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args)
-{
- int ret = -1;
- ctr_query_cbk_args_t *query_cbk_args = args;
-
- GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out);
-
- ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record);
- if (ret) {
- gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "Failed to write to query file");
- goto out;
- }
-
- query_cbk_args->count++;
-
- ret = 0;
-out:
- return ret;
-}
-
-/* This function does all the db queries related to tiering and
- * generates/populates new/existing query file
- * inputs:
- * xlator_t *this : CTR Translator
- * void *conn_node : Database connection
- * char *query_file: the query file that needs to be updated
- * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters
- * Return:
- * On success 0
- * On failure -1
- * */
-int
-ctr_db_query(xlator_t *this, void *conn_node, char *query_file,
- gfdb_ipc_ctr_params_t *ipc_ctr_params)
-{
- int ret = -1;
- ctr_query_cbk_args_t query_cbk_args = {0};
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, conn_node, out);
- GF_VALIDATE_OR_GOTO(this->name, query_file, out);
- GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out);
-
- /*Query for eligible files from db*/
- query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args.query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR,
- "Failed to open query file %s", query_file);
- goto out;
- }
- if (!ipc_ctr_params->is_promote) {
- if (ipc_ctr_params->emergency_demote) {
- /* emergency demotion mode */
- ret = find_all(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- ipc_ctr_params->query_limit);
- } else {
- if (ipc_ctr_params->write_freq_threshold == 0 &&
- ipc_ctr_params->read_freq_threshold == 0) {
- ret = find_unchanged_for_time(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp);
- } else {
- ret = find_unchanged_for_time_freq(
- conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp,
- ipc_ctr_params->write_freq_threshold,
- ipc_ctr_params->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (ipc_ctr_params->write_freq_threshold == 0 &&
- ipc_ctr_params->read_freq_threshold == 0) {
- ret = find_recently_changed_files(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp);
- } else {
- ret = find_recently_changed_files_freq(
- conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp,
- ipc_ctr_params->write_freq_threshold,
- ipc_ctr_params->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- ret = clear_files_heat(conn_node);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed to clear db entries");
- goto out;
- }
-
- ret = 0;
-out:
-
- if (!ret)
- ret = query_cbk_args.count;
-
- if (query_cbk_args.query_fd >= 0) {
- sys_close(query_cbk_args.query_fd);
- query_cbk_args.query_fd = -1;
- }
-
- return ret;
-}
-
-void *
-ctr_compact_thread(void *args)
-{
- int ret = -1;
- void *db_conn = NULL;
-
- xlator_t *this = NULL;
- gf_ctr_private_t *priv = NULL;
- gf_boolean_t compact_active = _gf_false;
- gf_boolean_t compact_mode_switched = _gf_false;
-
- this = (xlator_t *)args;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
-
- priv = this->private;
-
- db_conn = priv->_db_conn;
- compact_active = priv->compact_active;
- compact_mode_switched = priv->compact_mode_switched;
-
- gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction");
-
- ret = compact_db(db_conn, compact_active, compact_mode_switched);
-
- if (ret) {
- gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to perform the compaction");
- }
-
- ret = pthread_mutex_lock(&priv->compact_lock);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to acquire lock");
- goto out;
- }
-
- /* We are done compaction on this brick. Set all flags to false */
- priv->compact_active = _gf_false;
- priv->compact_mode_switched = _gf_false;
-
- ret = pthread_mutex_unlock(&priv->compact_lock);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to release lock");
- goto out;
- }
-
-out:
- return NULL;
-}
-
-int
-ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict)
-{
- int ret = -1;
- char *ctr_ipc_ops = NULL;
- gf_ctr_private_t *priv = NULL;
- char *db_version = NULL;
- char *db_param_key = NULL;
- char *db_param = NULL;
- char *query_file = NULL;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int result = 0;
- pthread_t compact_thread;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out);
- GF_VALIDATE_OR_GOTO(this->name, in_dict, out);
- GF_VALIDATE_OR_GOTO(this->name, out_dict, out);
-
- GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops,
- out);
-
- /*if its a db clear operation */
- if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS,
- SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) {
- ret = clear_files_heat(priv->_db_conn);
- if (ret)
- goto out;
-
- } /* if its a query operation, in which case its query + clear db*/
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS,
- SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) {
- ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting query file path");
- goto out;
- }
-
- ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- (void *)&ipc_ctr_params);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting query parameters");
- goto out;
- }
-
- ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params);
-
- ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed setting query reply");
- goto out;
- }
-
- } /* if its a query for db version */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS,
- SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) {
- ret = get_db_version(priv->_db_conn, &db_version);
- if (ret == -1 || !db_version) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting db version ");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION,
- db_version, ret, error);
-
- } /* if its a query for a db setting */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS,
- SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) {
- ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting db param key");
- goto out;
- }
-
- ret = get_db_params(priv->_db_conn, db_param_key, &db_param);
- if (ret == -1 || !db_param) {
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret,
- error);
- } /* if its an attempt to compact the database */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
- SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) {
- ret = pthread_mutex_lock(&priv->compact_lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to acquire lock for compaction");
- goto out;
- }
-
- if ((priv->compact_active || priv->compact_mode_switched)) {
- /* Compaction in progress. LEAVE */
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Compaction already in progress.");
- pthread_mutex_unlock(&priv->compact_lock);
- goto out;
- }
- /* At this point, we should be the only one on the brick */
- /* compacting */
-
- /* Grab the arguments from the dictionary */
- ret = dict_get_int32(in_dict, "compact_active", &result);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to get compaction type");
- goto out;
- }
-
- if (result) {
- priv->compact_active = _gf_true;
- }
-
- ret = dict_get_int32(in_dict, "compact_mode_switched", &result);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to see if compaction switched");
- goto out;
- }
-
- if (result) {
- priv->compact_mode_switched = _gf_true;
- gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
- "Pre-thread: Compact mode switch is true");
- } else {
- gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
- "Pre-thread: Compact mode switch is false");
- }
-
- ret = pthread_mutex_unlock(&priv->compact_lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to release lock for compaction");
- goto out;
- }
-
- ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread,
- (void *)this, "ctrcomp");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to spawn compaction thread");
- goto out;
- }
-
- goto out;
- } /* default case */
- else {
- goto out;
- }
-
- ret = 0;
- goto out;
-error:
- GF_FREE(db_param_key);
- GF_FREE(db_param);
- GF_FREE(db_version);
-out:
- return ret;
-}
-
-/* IPC Call from tier migrator to clear the heat on the DB */
-int32_t
-ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict)
-{
- int ret = -1;
- gf_ctr_private_t *priv = NULL;
- dict_t *out_dict = NULL;
-
- GF_ASSERT(this);
- priv = this->private;
- GF_ASSERT(priv);
- GF_ASSERT(priv->_db_conn);
- GF_VALIDATE_OR_GOTO(this->name, in_dict, wind);
-
- if (op != GF_IPC_TARGET_CTR)
- goto wind;
-
- out_dict = dict_new();
- if (!out_dict) {
- goto out;
- }
-
- ret = ctr_ipc_helper(this, in_dict, out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed in ctr_ipc_helper");
- }
-out:
-
- STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict);
-
- if (out_dict)
- dict_unref(out_dict);
-
- return 0;
-
-wind:
- STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, in_dict);
-
- return 0;
-}
-
-/* Call to initialize db for ctr xlator while ctr is enabled */
-int32_t
-initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv)
-{
- int ret_db = -1;
- dict_t *params_dict = NULL;
-
- if (!priv)
- goto error;
-
- /* For compaction */
- priv->compact_active = _gf_false;
- priv->compact_mode_switched = _gf_false;
- ret_db = pthread_mutex_init(&priv->compact_lock, NULL);
-
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed initializing compaction mutex");
- goto error;
- }
-
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED,
- "DB Params cannot initialized!");
- goto error;
- }
-
- /*Extract db params options*/
- ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
- "Failed extracting db params options");
- goto error;
- }
-
- /*Create a memory pool for ctr xlator*/
- this->local_pool = mem_pool_new(gf_ctr_local_t, 64);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
- "failed to create local memory pool");
- goto error;
- }
-
- /*Initialize Database Connection*/
- priv->_db_conn = init_db(params_dict, priv->gfdb_db_type);
- if (!priv->_db_conn) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed initializing data base");
- goto error;
- }
-
- ret_db = 0;
- goto out;
-
-error:
- if (this)
- mem_pool_destroy(this->local_pool);
-
- if (priv) {
- GF_FREE(priv->ctr_db_path);
- }
- GF_FREE(priv);
- ret_db = -1;
-out:
- if (params_dict)
- dict_unref(params_dict);
-
- return ret_db;
-}
-
-/******************************************************************************/
-int
-reconfigure(xlator_t *this, dict_t *options)
-{
- char *temp_str = NULL;
- int ret = 0;
- gf_ctr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) {
- gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set");
- }
-
- GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out);
- if (!priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is not enabled so skip ctr reconfigure");
- goto out;
- }
-
- /* If ctr is enabled after skip init for ctr xlator then call
- initialize_ctr_resource during reconfigure phase to allocate resources
- for xlator
- */
- if (priv->enabled && !priv->_db_conn) {
- ret = initialize_ctr_resource(this, priv);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed ctr initialize resource");
- goto out;
- }
- }
-
- GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool,
- out);
-
- GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat,
- options, bool, out);
-
- GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency,
- options, bool, out);
-
- GF_OPTION_RECONF("ctr_lookupheal_inode_timeout",
- priv->ctr_lookupheal_inode_timeout, options, uint64, out);
-
- GF_OPTION_RECONF("ctr_lookupheal_link_timeout",
- priv->ctr_lookupheal_link_timeout, options, uint64, out);
-
- GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool,
- out);
-
- GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out);
-
- /* If database is sqlite */
- if (priv->gfdb_db_type == GFDB_SQLITE3) {
- /* AUTOCHECKPOINT */
- if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) ==
- 0) {
- ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK);
- }
- }
-
- /* CACHE_SIZE */
- if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) {
- ret = set_db_params(priv->_db_conn, "cache_size", temp_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE);
- }
- }
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-/****************************init********************************************/
-
-int32_t
-init(xlator_t *this)
-{
- gf_ctr_private_t *priv = NULL;
- int ret_db = -1;
-
- if (!this) {
- gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: ctr this is not initialized");
- return -1;
- }
-
- if (!this->children || this->children->next) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: ctr should have exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME,
- "dangling volume. check volfile ");
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t);
- if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
- "Calloc did not work!!!");
- return -1;
- }
-
- /*Default values for the translator*/
- priv->ctr_record_wind = _gf_true;
- priv->ctr_record_unwind = _gf_false;
- priv->ctr_hot_brick = _gf_false;
- priv->gfdb_db_type = GFDB_SQLITE3;
- priv->gfdb_sync_type = GFDB_DB_SYNC;
- priv->_db_conn = NULL;
- priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD;
- priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD;
-
- /*Extract ctr xlator options*/
- ret_db = extract_ctr_options(this, priv);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
- "Failed extracting ctr xlator options");
- GF_FREE(priv);
- return -1;
- }
-
- if (!priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is not enabled so skip ctr init");
- goto out;
- }
-
- ret_db = initialize_ctr_resource(this, priv);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed ctr initialize resource");
- return -1;
- }
-
-out:
- this->private = (void *)priv;
- return 0;
-}
-
-int
-notify(xlator_t *this, int event, void *data, ...)
-{
- gf_ctr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- if (!priv)
- goto out;
-
- ret = default_notify(this, event, data);
-
-out:
- return ret;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
-
- ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-void
-fini(xlator_t *this)
-{
- gf_ctr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (priv && priv->enabled) {
- if (fini_db(priv->_db_conn)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
- "Failed closing "
- "db connection");
- }
-
- if (priv->_db_conn)
- priv->_db_conn = NULL;
-
- GF_FREE(priv->ctr_db_path);
- if (pthread_mutex_destroy(&priv->compact_lock)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
- "Failed to "
- "destroy the compaction mutex");
- }
- }
- GF_FREE(priv);
- mem_pool_destroy(this->local_pool);
- this->local_pool = NULL;
-
- return;
-}
-
-struct xlator_fops fops = {
- /*lookup*/
- .lookup = ctr_lookup,
- /*write fops */
- .mknod = ctr_mknod,
- .create = ctr_create,
- .truncate = ctr_truncate,
- .ftruncate = ctr_ftruncate,
- .setxattr = ctr_setxattr,
- .fsetxattr = ctr_fsetxattr,
- .removexattr = ctr_removexattr,
- .fremovexattr = ctr_fremovexattr,
- .unlink = ctr_unlink,
- .link = ctr_link,
- .rename = ctr_rename,
- .writev = ctr_writev,
- .setattr = ctr_setattr,
- .fsetattr = ctr_fsetattr,
- /*read fops*/
- .readv = ctr_readv,
- /* IPC call*/
- .ipc = ctr_ipc};
-
-struct xlator_cbks cbks = {.forget = ctr_forget};
-
-struct volume_options options[] = {
- {.key =
- {
- "ctr-enabled",
- },
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .description = "Enables the CTR",
- .flags = OPT_FLAG_SETTABLE},
- {.key = {"record-entry"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "on"},
- {.key = {"record-exit"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off"},
- {.key = {"record-counters"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .op_version = {GD_OP_VERSION_3_7_0},
- .flags = OPT_FLAG_SETTABLE,
- .tags = {}},
- {.key = {"ctr-record-metadata-heat"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {"ctr_link_consistency"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {"ctr_lookupheal_link_timeout"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "300",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_2},
- .tags = {}},
- {.key = {"ctr_lookupheal_inode_timeout"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "300",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_2},
- .tags = {}},
- {.key = {"hot-brick"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off"},
- {.key = {"db-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"},
- .default_value = "sqlite3",
- .op_version = {GD_OP_VERSION_3_7_0},
- .flags = OPT_FLAG_SETTABLE,
- .tags = {}},
- {.key = {"db-sync"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"sync", "async"},
- .default_value = "sync"},
- {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH},
- {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR},
- {.key = {GFDB_SQL_PARAM_SYNC},
- .type = GF_OPTION_TYPE_STR,
- .value = {"off", "normal", "full"},
- .default_value = "normal"},
- {.key = {GFDB_SQL_PARAM_JOURNAL_MODE},
- .type = GF_OPTION_TYPE_STR,
- .value = {"delete", "truncate", "persist", "memory", "wal", "off"},
- .default_value = "wal",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_AUTO_VACUUM},
- .type = GF_OPTION_TYPE_STR,
- .value = {"off", "full", "incr"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "25000",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_CACHE_SIZE},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "12500",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_PAGE_SIZE},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4096",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {NULL}},
-};
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h
deleted file mode 100644
index 2a8bbd18c5b..00000000000
--- a/xlators/features/changetimerecorder/src/changetimerecorder.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_H
-#define __CTR_H
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "ctr_mem_types.h"
-#include "ctr-helper.h"
-
-#endif /* __CTR_H */
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c
deleted file mode 100644
index e1e65735cef..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-helper.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "gfdb_sqlite3.h"
-#include "ctr-helper.h"
-#include "ctr-messages.h"
-
-/*******************************************************************************
- *
- * Fill unwind into db record
- *
- ******************************************************************************/
-int
-fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gfdb_time_t *ctr_uwtime = NULL;
- gf_ctr_private_t *_priv = NULL;
-
- GF_ASSERT(this);
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(ctr_local);
-
- /*If not unwind path error*/
- if (!isunwindpath(fop_path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
- "Wrong fop_path. Should be unwind");
- goto out;
- }
-
- ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time;
- CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type;
-
- ret = gettimeofday(ctr_uwtime, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
- "Error "
- "filling unwind time record %s",
- strerror(errno));
- goto out;
- }
-
- /* Special case i.e if its a tier rebalance
- * + cold tier brick
- * + its a create/mknod FOP
- * we record unwind time as zero */
- if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
- (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) {
- memset(ctr_uwtime, 0, sizeof(*ctr_uwtime));
- }
- ret = 0;
-out:
- return ret;
-}
-
-/*******************************************************************************
- *
- * Fill wind into db record
- *
- ******************************************************************************/
-int
-fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gfdb_time_t *ctr_wtime = NULL;
- gf_ctr_private_t *_priv = NULL;
-
- GF_ASSERT(this);
- _priv = this->private;
- GF_ASSERT(_priv);
- GF_ASSERT(ctr_local);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- /*if not wind path error!*/
- if (!iswindpath(ctr_inode_cx->fop_path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
- "Wrong fop_path. Should be wind");
- goto out;
- }
-
- ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time;
- CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
- CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency;
-
- ret = gettimeofday(ctr_wtime, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
- "Error filling wind time record %s", strerror(errno));
- goto out;
- }
-
- /* Special case i.e if its a tier rebalance
- * + cold tier brick
- * + its a create/mknod FOP
- * we record wind time as zero */
- if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
- (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) {
- memset(ctr_wtime, 0, sizeof(*ctr_wtime));
- }
-
- /* Copy gfid into db record */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
-
- /* Copy older gfid if any */
- if (ctr_inode_cx->old_gfid &&
- (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid));
- }
-
- /*Hard Links*/
- if (isdentryfop(ctr_inode_cx->fop_type)) {
- /*new link fop*/
- if (NEW_LINK_CX(ctr_inode_cx)) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
- *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
- strcpy(CTR_DB_REC(ctr_local).file_name,
- NEW_LINK_CX(ctr_inode_cx)->basename);
- }
- /*rename fop*/
- if (OLD_LINK_CX(ctr_inode_cx)) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid,
- *((OLD_LINK_CX(ctr_inode_cx))->pargfid));
- strcpy(CTR_DB_REC(ctr_local).old_file_name,
- OLD_LINK_CX(ctr_inode_cx)->basename);
- }
- }
-
- ret = 0;
-out:
- /*On error roll back and clean the record*/
- if (ret == -1) {
- CLEAR_CTR_DB_RECORD(ctr_local);
- }
- return ret;
-}
-
-/******************************************************************************
- *
- * CTR xlator init related functions
- *
- *
- * ****************************************************************************/
-static int
-extract_sql_params(xlator_t *this, dict_t *params_dict)
-{
- int ret = -1;
- char *db_path = NULL;
- char *db_name = NULL;
- char *db_full_path = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(params_dict);
-
- /*Extract the path of the db*/
- db_path = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path",
- db_path, "/var/run/gluster/");
-
- /*Extract the name of the db*/
- db_name = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name",
- db_name, "gf_ctr_db.db");
-
- /*Construct full path of the db*/
- ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name);
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_CONSTRUCT_DB_PATH_FAILED,
- "Construction of full db path failed!");
- goto out;
- }
-
- /*Setting the SQL DB Path*/
- SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
- db_full_path, ret, out);
-
- /*Extract rest of the sql params*/
- ret = gfdb_set_sql_params(this->name, this->options, params_dict);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed setting values to sql param dict!");
- }
-
- ret = 0;
-
-out:
- if (ret)
- GF_FREE(db_full_path);
- return ret;
-}
-
-int
-extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type)
-{
- int ret = -1;
-
- GF_ASSERT(this);
- GF_ASSERT(params_dict);
-
- switch (db_type) {
- case GFDB_SQLITE3:
- ret = extract_sql_params(this, params_dict);
- if (ret)
- goto out;
- break;
- case GFDB_ROCKS_DB:
- case GFDB_HYPERDEX:
- case GFDB_HASH_FILE_STORE:
- case GFDB_INVALID_DB:
- case GFDB_DB_END:
- goto out;
- }
- ret = 0;
-out:
- return ret;
-}
-
-int
-extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv)
-{
- int ret = -1;
- char *_val_str = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(_priv);
-
- /*Checking if the CTR Translator is enabled. By default its disabled*/
- _priv->enabled = _gf_false;
- GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out);
- if (!_priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is disabled.");
- ret = 0;
- goto out;
- }
-
- /*Extract db type*/
- GF_OPTION_INIT("db-type", _val_str, str, out);
- _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str);
-
- /*Extract flag for record on wind*/
- GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out);
-
- /*Extract flag for record on unwind*/
- GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out);
-
- /*Extract flag for record on counters*/
- GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out);
-
- /* Extract flag for record metadata heat */
- GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat,
- bool, out);
-
- /*Extract flag for link consistency*/
- GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool,
- out);
-
- /*Extract ctr_lookupheal_inode_timeout */
- GF_OPTION_INIT("ctr_lookupheal_inode_timeout",
- _priv->ctr_lookupheal_inode_timeout, uint64, out);
-
- /*Extract ctr_lookupheal_link_timeout*/
- GF_OPTION_INIT("ctr_lookupheal_link_timeout",
- _priv->ctr_lookupheal_link_timeout, uint64, out);
-
- /*Extract flag for hot tier brick*/
- GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out);
-
- /*Extract flag for sync mode*/
- GF_OPTION_INIT("db-sync", _val_str, str, out);
- _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str);
-
- ret = 0;
-
-out:
- return ret;
-}
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
deleted file mode 100644
index 3268c9d2fb9..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-helper.h
+++ /dev/null
@@ -1,854 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_HELPER_H
-#define __CTR_HELPER_H
-
-#include "xlator.h"
-#include "ctr_mem_types.h"
-#include "iatt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
-#include "common-utils.h"
-#include <time.h>
-#include <sys/time.h>
-#include <pthread.h>
-
-#include "gfdb_data_store.h"
-#include "ctr-xlator-ctx.h"
-#include "ctr-messages.h"
-
-#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */
-#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */
-
-typedef struct ctr_query_cbk_args {
- int query_fd;
- int count;
-} ctr_query_cbk_args_t;
-
-/*CTR Xlator Private structure*/
-typedef struct gf_ctr_private {
- gf_boolean_t enabled;
- char *ctr_db_path;
- gf_boolean_t ctr_hot_brick;
- gf_boolean_t ctr_record_wind;
- gf_boolean_t ctr_record_unwind;
- gf_boolean_t ctr_record_counter;
- gf_boolean_t ctr_record_metadata_heat;
- gf_boolean_t ctr_link_consistency;
- gfdb_db_type_t gfdb_db_type;
- gfdb_sync_type_t gfdb_sync_type;
- gfdb_conn_node_t *_db_conn;
- uint64_t ctr_lookupheal_link_timeout;
- uint64_t ctr_lookupheal_inode_timeout;
- gf_boolean_t compact_active;
- gf_boolean_t compact_mode_switched;
- pthread_mutex_t compact_lock;
-} gf_ctr_private_t;
-
-/*
- * gf_ctr_local_t is the ctr xlator local data structure that is stored in
- * the call_frame of each FOP.
- *
- * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is
- * used by the insert_record() api from the libgfdb. The gfdb_db_record object
- * will contain all the inode and hardlink(only for dentry fops: create,
- * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind
- * call and will be release during the unwind. The same gfdb_db_record will
- * used for the unwind insert_record() api, to record unwind in the database.
- *
- * ia_inode_type in gf_ctr_local will tell the type of the inode. This is
- * important for during the unwind path. As we will not have the inode during
- * the unwind path. We would have include this in the gfdb_db_record itself
- * but currently we record only file inode information.
- *
- * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and
- * take special/no action. We don't record change/access times or increement
- * heat counter for internal fops from rebalancer.
- * */
-typedef struct gf_ctr_local {
- gfdb_db_record_t gfdb_db_record;
- ia_type_t ia_inode_type;
- gf_boolean_t is_internal_fop;
- gf_special_pid_t client_pid;
-} gf_ctr_local_t;
-/*
- * Easy access of gfdb_db_record of ctr_local
- * */
-#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record)
-
-/*Clear db record*/
-#define CLEAR_CTR_DB_RECORD(ctr_local) \
- do { \
- ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \
- memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \
- sizeof(gfdb_time_t)); \
- memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \
- sizeof(gfdb_time_t)); \
- gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \
- gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \
- memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \
- memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \
- ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \
- ctr_local->ia_inode_type = IA_INVAL; \
- } while (0)
-
-static gf_ctr_local_t *
-init_ctr_local_t(xlator_t *this)
-{
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(this);
-
- ctr_local = mem_get0(this->local_pool);
- if (!ctr_local) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "Error while creating ctr local");
- goto out;
- }
-
- CLEAR_CTR_DB_RECORD(ctr_local);
-out:
- return ctr_local;
-}
-
-static void
-free_ctr_local(gf_ctr_local_t *ctr_local)
-{
- if (ctr_local)
- mem_put(ctr_local);
-}
-
-/******************************************************************************
- *
- *
- * Context Carrier Structures
- *
- *
- * ****************************************************************************/
-
-/*
- * Context Carrier structures are used to carry relevant information about
- * inodes and links from the fops calls to the ctr_insert_wind.
- * These structure just have pointers to the original data and donot
- * do a deep copy of any data. This info is deep copied to
- * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This
- * info remains persistent for the unwind in ctr_local->gfdb_db_record
- * and once used will be destroyed.
- *
- * gf_ctr_link_context_t : Context structure for hard links
- * gf_ctr_inode_context_t : Context structure for inodes
- *
- * */
-
-/*Context Carrier Structure for hard links*/
-typedef struct gf_ctr_link_context {
- uuid_t *pargfid;
- const char *basename;
-} gf_ctr_link_context_t;
-
-/*Context Carrier Structure for inodes*/
-typedef struct gf_ctr_inode_context {
- ia_type_t ia_type;
- uuid_t *gfid;
- uuid_t *old_gfid;
- gf_ctr_link_context_t *new_link_cx;
- gf_ctr_link_context_t *old_link_cx;
- gfdb_fop_type_t fop_type;
- gfdb_fop_path_t fop_path;
- gf_boolean_t is_internal_fop;
- /* Indicating metadata fops */
- gf_boolean_t is_metadata_fop;
-} gf_ctr_inode_context_t;
-
-/*******************Util Macros for Context Carrier Structures*****************/
-
-/*Checks if ctr_link_cx is sane!*/
-#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \
- do { \
- if (ctr_link_cx) { \
- if (ctr_link_cx->pargfid) \
- GF_ASSERT(*(ctr_link_cx->pargfid)); \
- GF_ASSERT(ctr_link_cx->basename); \
- }; \
- } while (0)
-
-/*Clear and fill the ctr_link_context with values*/
-#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \
- do { \
- GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \
- GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \
- GF_VALIDATE_OR_GOTO("ctr", _basename, label); \
- memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \
- ctr_link_cx->pargfid = &_pargfid; \
- ctr_link_cx->basename = _basename; \
- } while (0)
-
-#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx
-
-#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx
-
-/*Checks if ctr_inode_cx is sane!*/
-#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \
- do { \
- GF_ASSERT(ctr_inode_cx); \
- GF_ASSERT(ctr_inode_cx->gfid); \
- GF_ASSERT(*(ctr_inode_cx->gfid)); \
- GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \
- GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \
- IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
- IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
- } while (0)
-
-/*Clear and fill the ctr_inode_context with values*/
-#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \
- _old_link_cx, _fop_type, _fop_path) \
- do { \
- GF_ASSERT(ctr_inode_cx); \
- GF_ASSERT(_gfid); \
- GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \
- GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \
- memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \
- ctr_inode_cx->ia_type = _ia_type; \
- ctr_inode_cx->gfid = &_gfid; \
- IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
- if (_new_link_cx) \
- NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \
- IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
- if (_old_link_cx) \
- OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \
- ctr_inode_cx->fop_type = _fop_type; \
- ctr_inode_cx->fop_path = _fop_path; \
- } while (0)
-
-/******************************************************************************
- *
- * Util functions or macros used by
- * insert wind and insert unwind
- *
- * ****************************************************************************/
-/* Free ctr frame local */
-static inline void
-ctr_free_frame_local(call_frame_t *frame)
-{
- if (frame) {
- free_ctr_local((gf_ctr_local_t *)frame->local);
- frame->local = NULL;
- }
-}
-
-/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict
- * that has to be sent to POSIX Xlator to send
- * link count in unwind path.
- * return 0 for success with not creation of dict
- * return 1 for success with creation of dict
- * return -1 for failure.
- * */
-static inline int
-set_posix_link_request(xlator_t *this, dict_t **xdata)
-{
- int ret = -1;
- gf_boolean_t is_created = _gf_false;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- /*create xdata if NULL*/
- if (!*xdata) {
- *xdata = dict_new();
- is_created = _gf_true;
- ret = 1;
- } else {
- ret = 0;
- }
-
- if (!*xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
- "xdata is NULL :Cannot send "
- "GF_REQUEST_LINK_COUNT_XDATA to posix");
- ret = -1;
- goto out;
- }
-
- ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- if (ret == -1) {
- if (*xdata && is_created) {
- dict_unref(*xdata);
- }
- }
- return ret;
-}
-
-/*
- * If a bitrot fop
- * */
-#define BITROT_FOP(frame) \
- (frame->root->pid == GF_CLIENT_PID_BITD || \
- frame->root->pid == GF_CLIENT_PID_SCRUB)
-
-/*
- * If a rebalancer fop
- * */
-#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG)
-
-/*
- * If its a tiering rebalancer fop
- * */
-#define TIER_REBALANCE_FOP(frame) \
- (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
-
-/*
- * If its a AFR SELF HEAL
- * */
-#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD)
-
-/*
- * if a rebalancer fop goto
- * */
-#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \
- do { \
- if (REBALANCE_FOP(frame)) \
- goto label; \
- } while (0)
-
-/*
- * Internal fop
- *
- * */
-static inline gf_boolean_t
-is_internal_fop(call_frame_t *frame, dict_t *xdata)
-{
- gf_boolean_t ret = _gf_false;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- if (AFR_SELF_HEAL_FOP(frame)) {
- ret = _gf_true;
- }
- if (BITROT_FOP(frame)) {
- ret = _gf_true;
- }
- if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) {
- ret = _gf_true;
- if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) {
- ret = _gf_false;
- }
- }
- if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
- ret = _gf_true;
- }
-
- return ret;
-}
-
-#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \
- do { \
- if (is_internal_fop(frame, dict)) \
- goto label; \
- } while (0)
-
-/* if fop has failed exit */
-#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \
- do { \
- if (op_ret == -1) { \
- gf_msg_trace(this->name, 0, "Failed fop with %s", \
- strerror(op_errno)); \
- goto label; \
- }; \
- } while (0)
-
-/*
- * IS CTR Xlator is disabled then goto to label
- * */
-#define CTR_IS_DISABLED_THEN_GOTO(this, label) \
- do { \
- gf_ctr_private_t *_priv = NULL; \
- GF_ASSERT(this); \
- GF_ASSERT(this->private); \
- _priv = this->private; \
- if (!_priv->_db_conn) \
- goto label; \
- } while (0)
-
-/*
- * IS CTR record metadata heat is disabled then goto to label
- * */
-#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \
- do { \
- gf_ctr_private_t *_priv = NULL; \
- GF_ASSERT(this); \
- GF_ASSERT(this->private); \
- _priv = this->private; \
- if (!_priv->ctr_record_metadata_heat) \
- goto label; \
- } while (0)
-
-int
-fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path);
-
-int
-fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gf_ctr_inode_context_t *ctr_inode_cx);
-
-/*******************************************************************************
- * CTR INSERT WIND
- * *****************************************************************************
- * Function used to insert/update record into the database during a wind fop
- * This function creates ctr_local structure into the frame of the fop
- * call.
- * ****************************************************************************/
-
-static inline int
-ctr_insert_wind(call_frame_t *frame, xlator_t *this,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
- GF_ASSERT(this);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- /*If record_wind option of CTR is on record wind for
- * regular files only*/
- if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
- frame->local = init_ctr_local_t(this);
- if (!frame->local) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error while creating ctr local");
- goto out;
- };
- ctr_local = frame->local;
- ctr_local->client_pid = frame->root->pid;
- ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop;
-
- /* Decide whether to record counters or not */
- CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
- /* If record counter is enabled */
- if (_priv->ctr_record_counter) {
- /* If not a internal fop */
- if (!(ctr_local->is_internal_fop)) {
- /* If its a metadata fop AND
- * record metadata heat
- * OR
- * its NOT a metadata fop */
- if ((ctr_inode_cx->is_metadata_fop &&
- _priv->ctr_record_metadata_heat) ||
- (!ctr_inode_cx->is_metadata_fop)) {
- CTR_DB_REC(ctr_local).do_record_counters = _gf_true;
- }
- }
- }
-
- /* Decide whether to record times or not
- * For non internal FOPS record times as usual*/
- CTR_DB_REC(ctr_local).do_record_times = _gf_false;
- if (!ctr_local->is_internal_fop) {
- /* If its a metadata fop AND
- * record metadata heat
- * OR
- * its NOT a metadata fop */
- if ((ctr_inode_cx->is_metadata_fop &&
- _priv->ctr_record_metadata_heat) ||
- (!ctr_inode_cx->is_metadata_fop)) {
- CTR_DB_REC(ctr_local).do_record_times =
- (_priv->ctr_record_wind || _priv->ctr_record_unwind);
- }
- }
- /* when its a internal FOPS*/
- else {
- /* Record times only for create
- * i.e when the inode is created */
- CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop(
- ctr_inode_cx->fop_type))
- ? _gf_true
- : _gf_false;
- }
-
- /*Fill the db record for insertion*/
- ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
- "WIND: Error filling ctr local");
- goto out;
- }
-
- /*Insert the db record*/
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_RECORD_WIND_FAILED,
- "WIND: Inserting of record failed!");
- goto out;
- }
- }
- ret = 0;
-out:
-
- if (ret) {
- free_ctr_local(ctr_local);
- frame->local = NULL;
- }
-
- return ret;
-}
-
-/*******************************************************************************
- * CTR INSERT UNWIND
- * *****************************************************************************
- * Function used to insert/update record into the database during a unwind fop
- * This function destroys ctr_local structure into the frame of the fop
- * call at the end.
- * ****************************************************************************/
-static inline int
-ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type,
- gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- ctr_local = frame->local;
-
- if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) &&
- (ctr_local->ia_inode_type != IA_IFDIR)) {
- CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind;
-
- ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
-
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
- }
- ret = 0;
-out:
- return ret;
-}
-
-/******************************************************************************
- * Delete file/flink record/s from db
- * ****************************************************************************/
-static inline int
-ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid,
- char *basename, gfdb_fop_type_t fop_type,
- gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gfdb_db_record_t gfdb_db_record;
- gf_ctr_private_t *_priv = NULL;
-
- _priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, _priv, out);
- GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out);
- GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out);
- GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out);
- GF_VALIDATE_OR_GOTO(
- this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out);
-
- /* Set gfdb_db_record to 0 */
- memset(&gfdb_db_record, 0, sizeof(gfdb_db_record));
-
- /* Copy basename */
- if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >=
- GF_NAME_MAX)
- goto out;
-
- /* Copy gfid into db record */
- gf_uuid_copy(gfdb_db_record.gfid, gfid);
-
- /* Copy pargid into db record */
- gf_uuid_copy(gfdb_db_record.pargfid, pargfid);
-
- gfdb_db_record.gfdb_fop_path = fop_path;
- gfdb_db_record.gfdb_fop_type = fop_type;
-
- /*send delete request to db*/
- ret = insert_record(_priv->_db_conn, &gfdb_db_record);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED,
- "Failed to delete record. %s", basename);
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/******************************* Hard link function ***************************/
-
-static inline gf_boolean_t
-__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv,
- gfdb_time_t *current_time)
-{
- gf_boolean_t ret = _gf_false;
- uint64_t time_diff = 0;
-
- GF_ASSERT(ctr_xlator_ctx);
- GF_ASSERT(_priv);
- GF_ASSERT(current_time);
-
- time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period;
-
- ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true
- : _gf_false;
- return ret;
-}
-
-static inline gf_boolean_t
-__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv,
- gfdb_time_t *current_time)
-{
- gf_boolean_t ret = _gf_false;
- uint64_t time_diff = 0;
-
- GF_ASSERT(ctr_hard_link);
- GF_ASSERT(_priv);
- GF_ASSERT(current_time);
-
- time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period;
-
- ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true
- : _gf_false;
-
- return ret;
-}
-
-/* Return values of heal*/
-typedef enum ctr_heal_ret_val {
- CTR_CTX_ERROR = -1,
- /* No healing required */
- CTR_TRY_NO_HEAL = 0,
- /* Try healing hard link */
- CTR_TRY_HARDLINK_HEAL = 1,
- /* Try healing inode */
- CTR_TRY_INODE_HEAL = 2,
-} ctr_heal_ret_val_t;
-
-/**
- * @brief Function to add hard link to the inode context variable.
- * The inode context maintainences a in-memory list. This is used
- * smart healing of database.
- * @param frame of the FOP
- * @param this is the Xlator instant
- * @param inode
- * @return Return ctr_heal_ret_val_t
- */
-
-static inline ctr_heal_ret_val_t
-add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL;
- int ret = -1;
- gf_ctr_local_t *ctr_local = NULL;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- ctr_hard_link_t *ctr_hard_link = NULL;
- gf_ctr_private_t *_priv = NULL;
- gfdb_time_t current_time = {0};
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
- GF_ASSERT(this->private);
-
- _priv = this->private;
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
- "Failed accessing ctr inode context");
- goto out;
- }
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /* Check if the hard link already exists
- * in the ctr inode context*/
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx,
- CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- /* if there then ignore */
- if (ctr_hard_link) {
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- ret_val = CTR_CTX_ERROR;
- goto unlock;
- }
-
- if (__is_hardlink_expired(ctr_hard_link, _priv, &current_time)) {
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
- ret_val = ret_val | CTR_TRY_HARDLINK_HEAL;
- }
-
- if (__is_inode_expired(ctr_xlator_ctx, _priv, &current_time)) {
- ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
- ret_val = ret_val | CTR_TRY_INODE_HEAL;
- }
-
- goto unlock;
- }
-
- /* Add the hard link to the list*/
- ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED,
- "Failed to add hardlink to the ctr inode context");
- ret_val = CTR_CTX_ERROR;
- goto unlock;
- }
-
- ret_val = CTR_TRY_NO_HEAL;
-unlock:
- UNLOCK(&ctr_xlator_ctx->lock);
-out:
- return ret_val;
-}
-
-static inline int
-delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- /* Since there is no ctr inode context so nothing more to do */
- ret = 0;
- goto out;
- }
-
- ret = ctr_delete_hard_link(this, ctr_xlator_ctx,
- CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static inline int
-update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
- "Failed accessing ctr inode context");
- goto out;
- }
-
- ret = ctr_update_hard_link(
- this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid,
- CTR_DB_REC(ctr_local).old_file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-/******************************************************************************
- *
- * CTR xlator init related functions
- *
- *
- * ****************************************************************************/
-int
-extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type);
-
-int
-extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv);
-
-#endif
diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h
deleted file mode 100644
index 105d2265430..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-messages.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
- */
-
-#ifndef _CTR_MESSAGES_H_
-#define _CTR_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(
- CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
- CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
- CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
- CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
- CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
- CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
- CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED,
- CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED,
- CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED,
- CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
- CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
- CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED,
- CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED,
- CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
- CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME,
- CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
- CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
- CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH,
- CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST,
- CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED,
- CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
- CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL);
-
-#endif /* !_CTR_MESSAGES_H_ */
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
deleted file mode 100644
index b6b66d56731..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "ctr-xlator-ctx.h"
-#include "ctr-messages.h"
-#include <time.h>
-#include <sys/time.h>
-
-#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \
- (ctr_hard_link->list.next == ctr_hard_link->list.prev)
-
-static void
-fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link)
-{
- GF_ASSERT(ctr_hard_link);
-
- if (*ctr_hard_link)
- return;
- GF_FREE((*ctr_hard_link)->base_name);
- GF_FREE(*ctr_hard_link);
- *ctr_hard_link = NULL;
-}
-
-/* Please lock the ctr_xlator_ctx before using this function */
-ctr_hard_link_t *
-ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- ctr_hard_link_t *_hard_link = NULL;
- ctr_hard_link_t *searched_hardlink = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- if (pgfid == NULL || base_name == NULL)
- goto out;
-
- /*linear search*/
- list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list)
- {
- if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 &&
- _hard_link->base_name &&
- strcmp(_hard_link->base_name, base_name) == 0) {
- searched_hardlink = _hard_link;
- break;
- }
- }
-
-out:
- return searched_hardlink;
-}
-
-/* Please lock the ctr_xlator_ctx before using this function */
-int
-ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- if (pgfid == NULL || base_name == NULL)
- goto out;
-
- ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t);
- if (!ctr_hard_link) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
- "Failed allocating "
- "ctr_hard_link");
- goto out;
- }
-
- /*Initialize the ctr_hard_link object and
- * Assign the values : parent GFID and basename*/
- INIT_LIST_HEAD(&ctr_hard_link->list);
- gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
- ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
- "Failed copying basename"
- "to ctr_hard_link");
- goto error;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- goto error;
- }
-
- /*Add the hard link to the list*/
- list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list);
-
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
-
- /*aal izz well!*/
- ret = 0;
- goto out;
-error:
- GF_FREE(ctr_hard_link);
-out:
- return ret;
-}
-
-static void
-__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link)
-{
- GF_ASSERT(ctr_hard_link);
- GF_ASSERT(*ctr_hard_link);
-
- /*Remove hard link from list*/
- list_del(&(*ctr_hard_link)->list);
- fini_ctr_hard_link(ctr_hard_link);
-}
-
-int
-ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /*Check if the hard link is present */
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid,
- base_name);
- if (!ctr_hard_link) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST,
- "Hard link doesn't exist in the list");
- goto out;
- }
-
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
-
- ret = 0;
-out:
- UNLOCK(&ctr_xlator_ctx->lock);
-
- return ret;
-}
-
-int
-ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
- const char *old_base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /*Check if the hard link is present */
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid,
- old_base_name);
- if (!ctr_hard_link) {
- gf_msg_trace(this->name, 0,
- "Hard link doesn't exist"
- " in the list");
- /* Since the hard link is not present in the list
- * we add it to the list */
- ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED,
- "Failed adding hard link to the list");
- goto out;
- }
- ret = 0;
- goto out;
- }
-
- /* update the hard link */
- gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
- GF_FREE(ctr_hard_link->base_name);
- ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
- "Failed copying basename"
- "to ctr_hard_link");
- /* delete the corrupted entry */
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- ctr_hard_link->hardlink_heal_period = 0;
- } else {
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
- }
-
- ret = 0;
-
-out:
- UNLOCK(&ctr_xlator_ctx->lock);
-
- return ret;
-}
-
-/* Delete all hardlinks */
-static int
-ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- ctr_hard_link_t *tmp = NULL;
-
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list,
- list)
- {
- /*Remove hard link from list*/
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
- }
-
- UNLOCK(&ctr_xlator_ctx->lock);
-
- ret = 0;
-
- return ret;
-}
-
-/* Please lock the inode before using this function */
-static ctr_xlator_ctx_t *
-__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = 0;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ret = __inode_ctx_get(inode, this, &_addr);
- if (ret < 0)
- _addr = 0;
- if (_addr != 0) {
- ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
- }
-
- return ctr_xlator_ctx;
-}
-
-ctr_xlator_ctx_t *
-init_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- LOCK(&inode->lock);
- {
- ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
- if (ctr_xlator_ctx) {
- ret = 0;
- goto out;
- }
- ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx),
- gf_ctr_mt_xlator_ctx);
- if (!ctr_xlator_ctx)
- goto out;
-
- ret = LOCK_INIT(&ctr_xlator_ctx->lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED,
- "Failed init lock %s", strerror(ret));
- goto out;
- }
- _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx;
-
- ret = __inode_ctx_set(inode, this, &_addr);
- if (ret) {
- goto out;
- }
-
- INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list);
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- goto out;
- }
-
- ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
- }
- ret = 0;
-out:
- if (ret) {
- GF_FREE(ctr_xlator_ctx);
- ctr_xlator_ctx = NULL;
- }
-
- UNLOCK(&inode->lock);
-
- return ctr_xlator_ctx;
-}
-
-void
-fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = 0;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- inode_ctx_del(inode, this, &_addr);
- if (!_addr)
- return;
-
- ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
-
- ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed deleting all "
- "hard links from inode context");
- }
-
- LOCK_DESTROY(&ctr_xlator_ctx->lock);
-
- GF_FREE(ctr_xlator_ctx);
-}
-
-ctr_xlator_ctx_t *
-get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- LOCK(&inode->lock);
- ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
- UNLOCK(&inode->lock);
-
- return ctr_xlator_ctx;
-}
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
deleted file mode 100644
index 584d3b79ba4..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_XLATOR_CTX_H
-#define __CTR_XLATOR_CTX_H
-
-#include "xlator.h"
-#include "ctr_mem_types.h"
-#include "iatt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "locking.h"
-#include "common-utils.h"
-#include <time.h>
-#include <sys/time.h>
-
-typedef struct ctr_hard_link {
- uuid_t pgfid;
- char *base_name;
- /* Hardlink expiry : Defines the expiry period after which a
- * database heal is attempted. */
- uint64_t hardlink_heal_period;
- struct list_head list;
-} ctr_hard_link_t;
-
-typedef struct ctr_xlator_ctx {
- /* This represents the looked up hardlinks
- * NOTE: This doesn't represent all physical hardlinks of the inode*/
- struct list_head hardlink_list;
- uint64_t inode_heal_period;
- gf_lock_t lock;
-} ctr_xlator_ctx_t;
-
-ctr_hard_link_t *
-ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
- const char *old_base_name);
-
-ctr_xlator_ctx_t *
-get_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-ctr_xlator_ctx_t *
-init_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-void
-fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-#endif
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h
deleted file mode 100644
index 7b8f531ddec..00000000000
--- a/xlators/features/changetimerecorder/src/ctr_mem_types.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_MEM_TYPES_H__
-#define __CTR_MEM_TYPES_H__
-
-#include "gfdb_mem-types.h"
-
-enum gf_ctr_mem_types_ {
- gf_ctr_mt_private_t = gfdb_mt_end + 1,
- gf_ctr_mt_xlator_ctx,
- gf_ctr_mt_hard_link_t,
- gf_ctr_mt_end
-};
-#endif
diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am
index 0c3966c968b..e2a277e372b 100644
--- a/xlators/features/cloudsync/src/Makefile.am
+++ b/xlators/features/cloudsync/src/Makefile.am
@@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources)
nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h
BUILT_SOURCES = cloudsync-autogen-fops.h
-cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL)
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\"
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
index 6bb68cd170c..ee63f983980 100644
--- a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
@@ -20,11 +20,11 @@
#include <dlfcn.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#pragma generate
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
index 2db2a9c88c7..d922c77d8aa 100644
--- a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
@@ -15,7 +15,7 @@
#ifndef _CLOUDSYNC_AUTOGEN_FOPS_H
#define _CLOUDSYNC_AUTOGEN_FOPS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c
index aee1f06a82a..445a31b90e7 100644
--- a/xlators/features/cloudsync/src/cloudsync-common.c
+++ b/xlators/features/cloudsync/src/cloudsync-common.c
@@ -11,6 +11,20 @@
#include "cloudsync-common.h"
void
+cs_xattrinfo_wipe(cs_local_t *local)
+{
+ if (local->xattrinfo.lxattr) {
+ if (local->xattrinfo.lxattr->file_path)
+ GF_FREE(local->xattrinfo.lxattr->file_path);
+
+ if (local->xattrinfo.lxattr->volname)
+ GF_FREE(local->xattrinfo.lxattr->volname);
+
+ GF_FREE(local->xattrinfo.lxattr);
+ }
+}
+
+void
cs_local_wipe(xlator_t *this, cs_local_t *local)
{
if (!local)
@@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local)
if (local->remotepath)
GF_FREE(local->remotepath);
+ cs_xattrinfo_wipe(local);
+
mem_put(local);
}
diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h
index 0be6a446456..11d233460a4 100644
--- a/xlators/features/cloudsync/src/cloudsync-common.h
+++ b/xlators/features/cloudsync/src/cloudsync-common.h
@@ -10,13 +10,27 @@
#ifndef _CLOUDSYNC_COMMON_H
#define _CLOUDSYNC_COMMON_H
-#include "glusterfs.h"
-#include "call-stub.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
#include "cloudsync-mem-types.h"
#include "cloudsync-messages.h"
+typedef struct cs_loc_xattr {
+ char *file_path;
+ uuid_t uuid;
+ uuid_t gfid;
+ char *volname;
+} cs_loc_xattr_t;
+
+typedef struct cs_size_xattr {
+ uint64_t size;
+ uint64_t blksize;
+ uint64_t blocks;
+} cs_size_xattr_t;
+
typedef struct cs_local {
loc_t loc;
fd_t *fd;
@@ -34,10 +48,25 @@ typedef struct cs_local {
int call_cnt;
inode_t *inode;
char *remotepath;
+
+ struct {
+ /* offset, flags and size are the information needed
+ * by read fop for remote read operation. These will be
+ * populated in cloudsync read fop, before being passed
+ * on to the plugin performing remote read.
+ */
+ off_t offset;
+ uint32_t flags;
+ size_t size;
+ cs_loc_xattr_t *lxattr;
+ } xattrinfo;
+
} cs_local_t;
typedef int (*fop_download_t)(call_frame_t *frame, void *config);
+typedef int (*fop_remote_read_t)(call_frame_t *, void *);
+
typedef void *(*store_init)(xlator_t *this);
typedef int (*store_reconfigure)(xlator_t *this, dict_t *options);
@@ -48,6 +77,7 @@ struct cs_remote_stores {
char *name; /* store name */
void *config; /* store related information */
fop_download_t dlfop; /* store specific download function */
+ fop_remote_read_t rdfop; /* store specific read function */
store_init init; /* store init to initialize store config */
store_reconfigure reconfigure; /* reconfigure store config */
store_fini fini;
@@ -59,11 +89,15 @@ typedef struct cs_private {
struct cs_remote_stores *stores;
gf_boolean_t abortdl;
pthread_spinlock_t lock;
+ gf_boolean_t remote_read;
} cs_private_t;
void
cs_local_wipe(xlator_t *this, cs_local_t *local);
+void
+cs_xattrinfo_wipe(cs_local_t *local);
+
#define CS_STACK_UNWIND(fop, frame, params...) \
do { \
cs_local_t *__local = NULL; \
@@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local);
typedef struct store_methods {
int (*fop_download)(call_frame_t *frame, void *config);
+ int (*fop_remote_read)(call_frame_t *, void *);
/* return type should be the store config */
void *(*fop_init)(xlator_t *this);
int (*fop_reconfigure)(xlator_t *this, dict_t *options);
diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
index 3122bd32c01..c27df97ae58 100755
--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py
+++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
@@ -35,11 +35,19 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this,
__cs_inode_ctx_get (this, fd->inode, &ctx);
if (ctx)
- state = __cs_get_file_state (this, fd->inode, ctx);
+ state = __cs_get_file_state (fd->inode, ctx);
else
state = GF_CS_LOCAL;
- local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ());
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
if (ret) {
@@ -137,15 +145,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else {
__cs_inode_ctx_update (this, fd->inode, val);
gf_msg (this->name, GF_LOG_INFO, 0, 0,
- " state = %ld", val);
+ " state = %" PRIu64, val);
if (local->call_cnt == 1 &&
(val == GF_CS_REMOTE ||
val == GF_CS_DOWNLOADING)) {
gf_msg (this->name, GF_LOG_INFO, 0,
0, " will repair and download "
- "the file, current state : %ld",
- val);
+ "the file, current state : %"
+ PRIu64, val);
goto repair;
} else {
gf_msg (this->name, GF_LOG_ERROR, 0, 0,
@@ -187,19 +195,29 @@ int32_t
cs_@NAME@ (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
+ int op_errno = EINVAL;
cs_local_t *local = NULL;
int ret = 0;
local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@);
if (!local) {
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL");
+ op_errno = ENOMEM;
goto err;
}
if (loc->inode->ia_type == IA_IFDIR)
goto wind;
- local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
if (ret) {
@@ -215,7 +233,7 @@ wind:
return 0;
err:
- CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@);
+ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@);
return 0;
}
@@ -274,7 +292,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate',
# These are the current actual lists used to generate the code
# The following list contains fops which are fd based that modifies data
-fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync',
+fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync',
'ftruncate', 'rchecksum', 'fallocate',
'discard', 'zerofill', 'seek']
@@ -284,8 +302,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr',
'getattr']
# These fops need a separate implementation
-special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr',
- 'truncate', 'fstat']
+special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr',
+ 'truncate', 'fstat', 'readv', 'readdirp']
def gen_defaults():
for name in ops:
diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h
index 46d4f3aa2a1..220346405d0 100644
--- a/xlators/features/cloudsync/src/cloudsync-mem-types.h
+++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h
@@ -11,11 +11,12 @@
#ifndef __CLOUDSYNC_MEM_TYPES_H__
#define __CLOUDSYNC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum cs_mem_types_ {
gf_cs_mt_cs_private_t = gf_common_mt_end + 1,
gf_cs_mt_cs_remote_stores_t,
gf_cs_mt_cs_inode_ctx_t,
+ gf_cs_mt_cs_lxattr_t,
gf_cs_mt_end
};
#endif /* __CLOUDSYNC_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
index 4deefb651eb..fb6b0580c6d 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
@@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN
AMAZONS3_DIR = cloudsyncs3
endif
-SUBDIRS = ${AMAZONS3_DIR}
+if BUILD_CVLT_PLUGIN
+ CVLT_DIR = cvlt
+endif
+
+SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR}
CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
index 0aaab1fe955..7ccfcc9f4b6 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LIBAWS_MEM_TYPES_H__
#define __LIBAWS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum libaws_mem_types_ {
gf_libaws_mt_aws_private_t = gf_common_mt_end + 1,
gf_libaws_mt_end
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
index e2bbb680f6c..23c3599825a 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -15,8 +15,8 @@
#include <openssl/buffer.h>
#include <openssl/crypto.h>
#include <curl/curl.h>
-#include "xlator.h"
-#include "glusterfs.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
#include "libcloudsyncs3.h"
#include "cloudsync-common.h"
@@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
int date_len = -1;
int res_len = -1;
- ctime = time(NULL);
+ ctime = gf_time();
gtime = gmtime(&ctime);
date_len = strftime(httpdate, sizeof(httpdate),
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
index b1a95f8cbf9..85ae669486b 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
@@ -10,10 +10,10 @@
#ifndef _LIBAWS_H
#define _LIBAWS_H
-#include "glusterfs.h"
-#include "call-stub.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
#include <curl/curl.h>
#include "cloudsync-common.h"
#include "libcloudsyncs3-mem-types.h"
diff --git a/xlators/experimental/dht2/dht2-client/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/experimental/dht2/dht2-client/Makefile.am
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
new file mode 100644
index 00000000000..b512464f157
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
@@ -0,0 +1,12 @@
+csp_LTLIBRARIES = cloudsynccvlt.la
+cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins
+
+cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c
+cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src
+CLEANFILES =
+
+EXTRA_DIST = libcloudsynccvlt.sym
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
new file mode 100644
index 00000000000..7230ef77337
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
@@ -0,0 +1,203 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARCHIVESTORE_H__
+#define __ARCHIVESTORE_H__
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <dlfcn.h>
+#include <uuid/uuid.h>
+
+#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id"
+#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id"
+
+struct _archstore_methods;
+typedef struct _archstore_methods archstore_methods_t;
+
+struct _archstore_desc {
+ void *priv; /* Private field for store mgmt. */
+ /* To be used only by archive store*/
+};
+typedef struct _archstore_desc archstore_desc_t;
+
+struct _archstore_info {
+ char *id; /* Identifier for the archivestore */
+ uint32_t idlen; /* Length of identifier string */
+ char *prod; /* Name of the data mgmt. product */
+ uint32_t prodlen; /* Length of the product string */
+};
+typedef struct _archstore_info archstore_info_t;
+
+struct _archstore_fileinfo {
+ uuid_t uuid; /* uuid of the file */
+ char *path; /* file path */
+ uint32_t pathlength; /* length of file path */
+};
+typedef struct _archstore_fileinfo archstore_fileinfo_t;
+
+struct _app_callback_info {
+ archstore_info_t *src_archstore;
+ archstore_fileinfo_t *src_archfile;
+ archstore_info_t *dest_archstore;
+ archstore_fileinfo_t *dest_archfile;
+};
+typedef struct _app_callback_info app_callback_info_t;
+
+typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *,
+ void *, int64_t, int32_t);
+
+enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 };
+typedef enum _archstore_scan_type archstore_scan_type_t;
+
+typedef int32_t archstore_errno_t;
+
+/*
+ * Initialize archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the initialization
+ * arg3 name of the log file
+ */
+typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *,
+ const char *);
+
+/*
+ * Clean up archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the cleanup
+ */
+typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *);
+
+/*
+ * Read the contents of the file from archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be read
+ * arg4 offset in the file from which data should be read
+ * arg5 buffer where the data should be read
+ * arg6 number of bytes of data to be read
+ * arg7 error number if any generated during the read from file
+ * arg8 callback handler to be invoked after the data is read
+ * arg9 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *, off_t, char *,
+ size_t, archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store
+ * This is basically in-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 error number if any generated during the file restore
+ * arg5 callback to be invoked after the file is restored
+ * arg6 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store to a different store
+ * This is basically out-of-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about the location to
+ which the file will be restored
+ * arg6 error number if any generated during the file restore
+ * arg7 callback to be invoked after the file is restored
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Archive the contents of the file to archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be archived
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be archived
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Backup list of files provided in the input file
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be backed up
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be backed up
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Scan the contents of a store and determine the files which need to be
+ * backed up.
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 type of scan whether full or incremental
+ * arg4 path to file that contains list of files to be backed up
+ * arg5 error number if any generated during scan operation
+ */
+typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_scan_type_t, char *,
+ archstore_errno_t *);
+
+struct _archstore_methods {
+ init_archstore_t init;
+ term_archstore_t fini;
+ backup_archstore_t backup;
+ archive_archstore_t archive;
+ scan_archstore_t scan;
+ restore_archstore_t restore;
+ recall_archstore_t recall;
+ read_archstore_t read;
+};
+
+typedef int (*get_archstore_methods_t)(archstore_methods_t *);
+
+/*
+ * Single function that will be invoked by applications for extracting
+ * the function pointers to all data management functions.
+ */
+int32_t
+get_archstore_methods(archstore_methods_t *);
+
+#endif /* End of __ARCHIVESTORE_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
new file mode 100644
index 00000000000..57c9aa77da0
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CVLT_MESSAGES_H_
+#define _CVLT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE,
+ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED,
+ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED);
+
+#endif /* !_CVLT_MESSAGES_H_ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
new file mode 100644
index 00000000000..0bc273670d5
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
@@ -0,0 +1 @@
+store_ops
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
new file mode 100644
index 00000000000..c24fab8bfe7
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __LIBCVLT_MEM_TYPES_H__
+#define __LIBCVLT_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+enum libcvlt_mem_types_ {
+ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1,
+ gf_libcvlt_mt_end
+};
+#endif /* __LIBCVLT_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
new file mode 100644
index 00000000000..5b7272bb448
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
@@ -0,0 +1,842 @@
+#include <stdlib.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include "libcvlt.h"
+#include "cloudsync-common.h"
+#include "cvlt-messages.h"
+
+#define LIBARCHIVE_SO "libopenarchive.so"
+#define ALIGN_SIZE 4096
+#define CVLT_TRAILER "cvltv1"
+
+store_methods_t store_ops = {
+ .fop_download = cvlt_download,
+ .fop_init = cvlt_init,
+ .fop_reconfigure = cvlt_reconfigure,
+ .fop_fini = cvlt_fini,
+ .fop_remote_read = cvlt_read,
+};
+
+static const int32_t num_req = 32;
+static const int32_t num_iatt = 32;
+static char *plugin = "cvlt_cloudSync";
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+static void
+cvlt_free_resources(archive_t *arch)
+{
+ /*
+ * We will release all the resources that were allocated by the xlator.
+ * Check whether there are any buffers which have not been released
+ * back to a mempool.
+ */
+
+ if (arch->handle) {
+ dlclose(arch->handle);
+ }
+
+ if (arch->iobuf_pool) {
+ iobuf_pool_destroy(arch->iobuf_pool);
+ }
+
+ if (arch->req_pool) {
+ mem_pool_destroy(arch->req_pool);
+ arch->req_pool = NULL;
+ }
+
+ return;
+}
+
+static int32_t
+cvlt_extract_store_fops(xlator_t *this, archive_t *arch)
+{
+ int32_t op_ret = -1;
+ get_archstore_methods_t get_archstore_methods;
+
+ /*
+ * libopenarchive.so defines methods for performing data management
+ * operations. We will extract the methods from library and these
+ * methods will be invoked for moving data between glusterfs volume
+ * and the data management product.
+ */
+
+ VALIDATE_OR_GOTO(arch, err);
+
+ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW);
+ if (!arch->handle) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED,
+ " failed to open %s ", LIBARCHIVE_SO);
+ return op_ret;
+ }
+
+ dlerror(); /* Clear any existing error */
+
+ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods");
+ if (!get_archstore_methods) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Error extracting get_archstore_methods()");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+ op_ret = get_archstore_methods(&(arch->fops));
+ if (op_ret) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Failed to extract methods in get_archstore_methods");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+err:
+ return op_ret;
+}
+
+static int32_t
+cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ /*
+ * Initialize information about all the memory pools that will be
+ * used by this xlator.
+ */
+ arch->nreqs = 0;
+
+ arch->req_pool = NULL;
+
+ arch->handle = NULL;
+ arch->xl = this;
+
+ arch->req_pool = mem_pool_new(cvlt_request_t, num_req);
+ if (!arch->req_pool) {
+ goto err;
+ }
+
+ arch->iobuf_pool = iobuf_pool_new();
+ if (!arch->iobuf_pool) {
+ goto err;
+ }
+
+ if (cvlt_extract_store_fops(this, arch)) {
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ return -1;
+}
+
+static void
+cvlt_req_init(cvlt_request_t *req)
+{
+ sem_init(&(req->sem), 0, 0);
+
+ return;
+}
+
+static void
+cvlt_req_destroy(cvlt_request_t *req)
+{
+ if (req->iobuf) {
+ iobuf_unref(req->iobuf);
+ }
+
+ if (req->iobref) {
+ iobref_unref(req->iobref);
+ }
+
+ sem_destroy(&(req->sem));
+
+ return;
+}
+
+static cvlt_request_t *
+cvlt_alloc_req(archive_t *arch)
+{
+ cvlt_request_t *reqptr = NULL;
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ reqptr = mem_get0(arch->req_pool);
+ if (reqptr) {
+ cvlt_req_init(reqptr);
+ }
+ }
+
+ if (reqptr) {
+ LOCK(&(arch->lock));
+ arch->nreqs++;
+ UNLOCK(&(arch->lock));
+ }
+
+err:
+ return reqptr;
+}
+
+static int32_t
+cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr)
+{
+ if (!reqptr) {
+ goto err;
+ }
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ /*
+ * Free the request resources if they exist.
+ */
+
+ cvlt_req_destroy(reqptr);
+ mem_put(reqptr);
+
+ LOCK(&(arch->lock));
+ arch->nreqs--;
+ UNLOCK(&(arch->lock));
+ }
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ int32_t ret = -1;
+ int32_t errnum = -1;
+ int32_t locked = 0;
+
+ /*
+ * Perform all the initializations needed for brining up the xlator.
+ */
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK_INIT(&(arch->lock));
+ LOCK(&(arch->lock));
+
+ locked = 1;
+
+ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt);
+
+ if (ret) {
+ goto err;
+ }
+
+ /*
+ * Now that the fops have been extracted initialize the store
+ */
+ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin);
+ if (ret) {
+ goto err;
+ }
+
+ UNLOCK(&(arch->lock));
+ locked = 0;
+ ret = 0;
+
+ return ret;
+
+err:
+ if (arch) {
+ cvlt_free_resources(arch);
+
+ if (locked) {
+ UNLOCK(&(arch->lock));
+ }
+ }
+
+ return ret;
+}
+
+static int32_t
+cvlt_term_xlator(archive_t *arch)
+{
+ int32_t errnum = -1;
+
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK(&(arch->lock));
+
+ /*
+ * Release the resources that have been allocated inside store
+ */
+ arch->fops.fini(&(arch->descinfo), &errnum);
+
+ cvlt_free_resources(arch);
+
+ UNLOCK(&(arch->lock));
+
+ GF_FREE(arch);
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info)
+{
+ if (!store_info) {
+ return -1;
+ }
+
+ store_info->prod = priv->product_id;
+ store_info->prodlen = strlen(priv->product_id);
+
+ store_info->id = priv->store_id;
+ store_info->idlen = strlen(priv->store_id);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->uuid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr,
+ archstore_info_t *store_info)
+{
+ static char *product = "glusterfs";
+
+ if (!xattr || !store_info) {
+ return -1;
+ }
+
+ store_info->prod = product;
+ store_info->prodlen = strlen(product);
+
+ store_info->id = xattr->volname;
+ store_info->idlen = strlen(xattr->volname);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr,
+ archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->gfid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static void
+cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs)
+{
+ /*
+ * If the file was archived then the reported size will not be a
+ * correct one. We need to fix this.
+ */
+ if (buf && xattrs) {
+ buf->ia_size = xattrs->size;
+ buf->ia_blksize = xattrs->blksize;
+ buf->ia_blocks = xattrs->blocks;
+ }
+
+ return;
+}
+
+static void
+cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo,
+ void *cookie, int64_t op_ret, int32_t op_errno)
+{
+ struct iovec iov;
+ xlator_t *this = NULL;
+ struct iatt postbuf = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+ cs_local_t *local = NULL;
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ frame = req->frame;
+ this = frame->this;
+ local = frame->local;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64
+ " op : %d ret : %" PRId64 " errno : %d",
+ req->offset, req->bytes, req->op_type, op_ret, op_errno);
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ req->iobref = iobref_new();
+ if (!req->iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add(req->iobref, req->iobuf);
+ iov.iov_base = iobuf_ptr(req->iobuf);
+ iov.iov_len = op_ret;
+
+ cvlt_copy_stat_info(&postbuf, &(req->szxattr));
+
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) {
+ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s",
+ uuid_utoa(req->file_info.uuid));
+ op_errno = ENOENT;
+ }
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf,
+ req->iobref, local->xattr_rsp);
+
+ cvlt_free_req(priv, req);
+
+ return;
+}
+
+static void
+cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info,
+ void *cookie, int64_t ret, int errcode)
+{
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+
+ gf_msg_debug(plugin, 0,
+ " Download callback invoked ret : %" PRId64 " errno : %d",
+ ret, errcode);
+
+ req->op_ret = ret;
+ req->op_errno = errcode;
+ sem_post(&(req->sem));
+
+ return;
+}
+
+void *
+cvlt_init(xlator_t *this)
+{
+ int ret = 0;
+ archive_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "should have exactly one child");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "dangling volume. check volfile");
+ ret = -1;
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t);
+ if (!priv) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->trailer = CVLT_TRAILER;
+ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed");
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out);
+ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out);
+
+ gf_msg(plugin, GF_LOG_INFO, 0, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+out:
+ if (ret == -1) {
+ cvlt_term_xlator(priv);
+ return (NULL);
+ }
+ return priv;
+}
+
+int
+cvlt_reconfigure(xlator_t *this, dict_t *options)
+{
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ goto out;
+
+ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out);
+
+ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str,
+ out);
+ gf_msg_debug(plugin, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+ return 0;
+out:
+ return -1;
+}
+
+void
+cvlt_fini(void *config)
+{
+ archive_t *priv = NULL;
+
+ priv = (archive_t *)config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ return;
+
+ cvlt_term_xlator(priv);
+ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources");
+ return;
+}
+
+int
+cvlt_download(call_frame_t *frame, void *config)
+{
+ archive_t *parch = NULL;
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+ cvlt_request_t *req = NULL;
+ archstore_info_t dest_storeinfo;
+ archstore_fileinfo_t dest_fileinfo;
+ int32_t op_ret, op_errno;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ",
+ locxattr->uuid, uuid_utoa(locxattr->gfid));
+
+ if (!(parch->fops.restore)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * Download needs to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_RESTORE_OP;
+ req->frame = frame;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To restore the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * We need to perform in-place restore of the file from data management
+ * store to gusterfs volume.
+ */
+ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract destination store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Submit the restore request.
+ */
+ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), &dest_storeinfo,
+ &dest_fileinfo, &op_errno,
+ cvlt_download_complete, req);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " failed to restore file gfid=%s from data management store",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Wait for the restore to complete.
+ */
+ sem_wait(&(req->sem));
+
+ if (req->op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+
+err:
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return -1;
+}
+
+int
+cvlt_read(call_frame_t *frame, void *config)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ archive_t *parch = NULL;
+ cvlt_request_t *req = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobref *iobref;
+ size_t size = 0;
+ off_t off = 0;
+
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+
+ size = local->xattrinfo.size;
+ off = local->xattrinfo.offset;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " read invoked for gfid = %s offset = %" PRIu64
+ " file_size = %" PRIu64,
+ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size);
+
+ if (off >= local->stbuf.ia_size) {
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+
+ op_errno = ENOENT;
+ op_ret = 0;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid));
+
+ goto err;
+ }
+
+ if (!size) {
+ op_errno = EINVAL;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " zero size read attempted on gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (!(parch->fops.read)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * The read request need to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE);
+ if (!req->iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_READ_OP;
+ req->offset = off;
+ req->bytes = size;
+ req->frame = frame;
+ req->szxattr.size = local->stbuf.ia_size;
+ req->szxattr.blocks = local->stbuf.ia_blocks;
+ req->szxattr.blksize = local->stbuf.ia_blksize;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To read the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ /*
+ * Submit the read request.
+ */
+ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), off, req->iobuf->ptr, size,
+ &op_errno, cvlt_readv_complete, req);
+
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " read failed on gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ iobref = iobref_new();
+ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d",
+ op_ret, op_errno);
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1,
+ &(local->stbuf), iobref, local->xattr_rsp);
+
+ if (iobref) {
+ iobref_unref(iobref);
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
new file mode 100644
index 00000000000..c45ac948f6c
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBCVLT_H
+#define _LIBCVLT_H
+
+#include <semaphore.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
+#include "cloudsync-common.h"
+#include "libcvlt-mem-types.h"
+#include "archivestore.h"
+
+enum _cvlt_op {
+ CVLT_READ_OP = 1,
+ CVLT_WRITE_OP = 2,
+ CVLT_RESTORE_OP = 3,
+ CVLT_ARCHIVE_OP = 4,
+ CVLT_LOOKUP_OP = 5,
+ CVLT_XATTR_OP = 6,
+ CVLT_STAT_OP = 7,
+ CVLT_FSTAT_op = 8,
+ CVLT_UNDEF_OP = 127
+};
+typedef enum _cvlt_op cvlt_op_t;
+
+struct _archive;
+struct _cvlt_request {
+ uint64_t offset;
+ uint64_t bytes;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ call_frame_t *frame;
+ cvlt_op_t op_type;
+ int32_t op_ret;
+ int32_t op_errno;
+ xlator_t *this;
+ sem_t sem;
+ archstore_info_t store_info;
+ archstore_fileinfo_t file_info;
+ cs_size_xattr_t szxattr;
+};
+typedef struct _cvlt_request cvlt_request_t;
+
+struct _archive {
+ gf_lock_t lock; /* lock for controlling access */
+ xlator_t *xl; /* xlator */
+ void *handle; /* handle returned from dlopen */
+ int32_t nreqs; /* num requests active */
+ struct mem_pool *req_pool; /* pool for requests */
+ struct iobuf_pool *iobuf_pool; /* iobuff pool */
+ archstore_desc_t descinfo; /* Archive store descriptor info */
+ archstore_methods_t fops; /* function pointers */
+ char *product_id;
+ char *store_id;
+ char *trailer;
+};
+typedef struct _archive archive_t;
+
+void *
+cvlt_init(xlator_t *);
+
+int
+cvlt_reconfigure(xlator_t *, dict_t *);
+
+void
+cvlt_fini(void *);
+
+int
+cvlt_download(call_frame_t *, void *);
+
+int
+cvlt_read(call_frame_t *, void *);
+
+#endif
diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
index 918ed786b6b..7f0b9e563b8 100644
--- a/xlators/features/cloudsync/src/cloudsync.c
+++ b/xlators/features/cloudsync/src/cloudsync.c
@@ -8,17 +8,18 @@
* cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "cloudsync-autogen-fops.h"
+#include <string.h>
#include <dlfcn.h>
-void
+static void
cs_cleanup_private(cs_private_t *priv)
{
if (priv) {
@@ -34,11 +35,15 @@ cs_cleanup_private(cs_private_t *priv)
return;
}
-struct cs_plugin plugins[] = {
+static struct cs_plugin plugins[] = {
{.name = "cloudsyncs3",
.library = "cloudsyncs3.so",
.description = "cloudsync s3 store."},
-
+#if defined(__linux__)
+ {.name = "cvlt",
+ .library = "cloudsynccvlt.so",
+ .description = "Commvault content store."},
+#endif
{.name = NULL},
};
@@ -72,12 +77,14 @@ cs_init(xlator_t *this)
this->private = priv;
+ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out);
+
/* temp workaround. Should be configurable through glusterd*/
per_vol = _gf_true;
if (per_vol) {
- if (dict_get_str(this->options, "cloudsync-storetype", &temp_str) ==
- 0) {
+ if (dict_get_str_sizen(this->options, "cloudsync-storetype",
+ &temp_str) == 0) {
for (index = 0; plugins[index].name; index++) {
if (!strcmp(temp_str, plugins[index].name)) {
libname = plugins[index].library;
@@ -135,6 +142,18 @@ cs_init(xlator_t *this)
(void)dlerror();
+ if (priv->remote_read) {
+ priv->stores->rdfop = store_methods->fop_remote_read;
+ if (!priv->stores->rdfop) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " read fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+ }
+
priv->stores->dlfop = store_methods->fop_download;
if (!priv->stores->dlfop) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
@@ -181,8 +200,10 @@ cs_init(xlator_t *this)
out:
if (ret == -1) {
- if (this->local_pool)
+ if (this->local_pool) {
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
cs_cleanup_private(priv);
@@ -196,6 +217,22 @@ out:
return ret;
}
+int
+cs_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0;
+ cs_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
+
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int;
+
+ GF_FREE(ctx);
+ return 0;
+}
+
void
cs_fini(xlator_t *this)
{
@@ -217,6 +254,9 @@ cs_reconfigure(xlator_t *this, dict_t *options)
goto out;
}
+ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool,
+ out);
+
/* needed only for per volume configuration*/
ret = priv->stores->reconfigure(this, options);
@@ -242,32 +282,6 @@ out:
}
int32_t
-cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *tmp = NULL;
- char *sxattr = NULL;
- uint64_t ia_size = 0;
- int ret = 0;
-
- list_for_each_entry(tmp, &entries->list, list)
- {
- ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr);
- if (ret) {
- gf_msg_trace(this->name, 0, "size xattr found");
- continue;
- }
-
- ia_size = atoll(sxattr);
- tmp->d_stat.ia_size = ia_size;
- }
-
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
@@ -277,16 +291,23 @@ cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (!xdata) {
xdata = dict_new();
if (!xdata) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to create "
+ "dict");
goto err;
}
}
- ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1);
+ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1);
if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
goto err;
}
- STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this),
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
return 0;
err:
@@ -305,7 +326,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- /* Do we need lock here? */
local->call_cnt++;
if (op_ret == -1) {
@@ -320,13 +340,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
} else {
__cs_inode_ctx_update(this, local->loc.inode, val);
- gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
if (local->call_cnt == 1 &&
(val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
gf_msg(this->name, GF_LOG_WARNING, 0, 0,
"will repair and download "
- "the file, current state : %ld",
+ "the file, current state : %" PRIu64,
val);
goto repair;
} else {
@@ -368,7 +388,6 @@ int32_t
cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
- int op_errno = -1;
cs_local_t *local = NULL;
int ret = 0;
cs_inode_ctx_t *ctx = NULL;
@@ -381,14 +400,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local = cs_local_init(this, frame, loc, NULL, GF_FOP_TRUNCATE);
if (!local) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
- op_errno = ENOMEM;
goto err;
}
__cs_inode_ctx_get(this, loc->inode, &ctx);
if (ctx)
- state = __cs_get_file_state(this, loc->inode, ctx);
+ state = __cs_get_file_state(loc->inode, ctx);
else
state = GF_CS_LOCAL;
@@ -407,7 +425,6 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
xdata);
if (!local->stub) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
- op_errno = ENOMEM;
goto err;
}
@@ -419,14 +436,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local->call_cnt++;
ret = locate_and_execute(frame);
if (ret) {
- op_errno = ENOMEM;
goto err;
}
}
return 0;
err:
- CS_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ CS_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -498,7 +514,7 @@ cs_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
- tmp = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
+ tmp = dict_get_sizen(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
if (tmp) {
/* Value of key should be the atime */
local->stub = fop_setxattr_stub(frame, cs_resume_setxattr, loc, dict,
@@ -665,7 +681,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret == 0) {
ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
if (!ret) {
- gf_msg_debug(this->name, 0, "state %ld", val);
+ gf_msg_debug(this->name, 0, "state %" PRIu64, val);
ret = __cs_inode_ctx_update(this, fd->inode, val);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
@@ -831,7 +847,7 @@ out:
return 0;
}
-void *
+int
cs_download_task(void *arg)
{
call_frame_t *frame = NULL;
@@ -842,7 +858,6 @@ cs_download_task(void *arg)
fd_t *fd = NULL;
cs_local_t *local = NULL;
dict_t *dict = NULL;
- int *retval = NULL;
frame = (call_frame_t *)arg;
@@ -850,13 +865,6 @@ cs_download_task(void *arg)
priv = this->private;
- retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int);
- if (!retval) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
- ret = -1;
- goto out;
- }
-
if (!priv->stores) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"No remote store "
@@ -972,20 +980,13 @@ out:
local->dlfd = NULL;
}
- if (retval) {
- *retval = ret;
- pthread_exit(retval);
- } else {
- pthread_exit(&ret);
- }
+ return ret;
}
int
cs_download(call_frame_t *frame)
{
- int *retval = NULL;
int ret = 0;
- pthread_t dthread;
cs_local_t *local = NULL;
xlator_t *this = NULL;
@@ -1000,16 +1001,404 @@ cs_download(call_frame_t *frame)
goto out;
}
- ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame,
- "downloadthread");
+ ret = cs_download_task((void *)frame);
+out:
+ return ret;
+}
+
+int
+cs_set_xattr_req(call_frame_t *frame)
+{
+ cs_local_t *local = NULL;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+
+ /* When remote reads are performed (i.e. reads on remote store),
+ * there needs to be a way to associate a file on gluster volume
+ * with its correspnding file on the remote store. In order to do
+ * that, a unique key can be maintained as an xattr
+ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks.
+ * This xattr should be provided to the plugin to
+ * perform the read fop on the correct file. This assumes that the file
+ * hierarchy and name need not be the same on remote store as that of
+ * the gluster volume.
+ */
+ ret = dict_set_sizen_str_sizen(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID,
+ "1");
+
+ return 0;
+}
+
+int
+cs_update_xattrs(call_frame_t *frame, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int size = -1;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t),
+ gf_cs_mt_cs_lxattr_t);
+ if (!local->xattrinfo.lxattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid);
+
+ if (local->remotepath) {
+ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath);
+ if (!local->xattrinfo.lxattr->file_path) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ }
- pthread_join(dthread, (void **)&retval);
+ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID,
+ &(local->xattrinfo.lxattr->uuid));
- ret = *retval;
+ if (ret) {
+ gf_uuid_clear(local->xattrinfo.lxattr->uuid);
+ }
+ size = strlen(this->name) - strlen("-cloudsync") + 1;
+ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char);
+ if (!local->xattrinfo.lxattr->volname) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1);
+ local->xattrinfo.lxattr->volname[size - 1] = '\0';
+
+ return 0;
+err:
+ cs_xattrinfo_wipe(local);
+ return -1;
+}
+
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags)
+{
+ xlator_t *this = NULL;
+ cs_private_t *priv = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+
+ if (!local->remotepath) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "remote path not"
+ " available. Check posix logs to resolve");
+ goto out;
+ }
+
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "No remote store "
+ "plugins found");
+ ret = -1;
+ goto out;
+ }
+
+ if (local->fd) {
+ fd = fd_anonymous(local->fd->inode);
+ } else {
+ fd = fd_anonymous(local->loc.inode);
+ }
+
+ local->xattrinfo.size = size;
+ local->xattrinfo.offset = offset;
+ local->xattrinfo.flags = flags;
+
+ if (!fd) {
+ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ local->dlfd = fd;
+ local->dloffset = offset;
+
+ /*this calling method is for per volume setting */
+ ret = priv->stores->rdfop(frame, priv->stores->config);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "read failed"
+ ", remotepath: %s",
+ local->remotepath);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "read success, path"
+ " : %s",
+ local->remotepath);
+ }
out:
- if (retval)
- GF_FREE(retval);
+ if (fd) {
+ fd_unref(fd);
+ local->dlfd = NULL;
+ }
+ return ret;
+}
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ uint64_t val = 0;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ fd = local->fd;
+
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "could not get file state, unwinding");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update(this, fd->inode, val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ " will read from remote : %" PRIu64, val);
+ goto repair;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "second readv, Unwinding");
+ goto unwind;
+ }
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "could not be figured, unwinding");
+ goto unwind;
+ }
+ } else {
+ /* successful readv => file is local */
+ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "state : GF_CS_LOCAL"
+ ", readv successful");
+
+ goto unwind;
+ }
+
+repair:
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+
+ ret = cs_resume_postprocess(this, frame, fd->inode);
+ if (ret) {
+ goto unwind;
+ }
+
+ cs_inodelk_unlock(frame);
+
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+
+ return 0;
+
+unwind:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+ cs_local_t *local = NULL;
+ gf_cs_obj_state state = -1;
+ cs_inode_ctx_t *ctx = NULL;
+
+ cs_inodelk_unlock(frame);
+
+ local = frame->local;
+ if (!local) {
+ ret = -1;
+ goto unwind;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ state = __cs_get_file_state(fd->inode, ctx);
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "status is GF_CS_ERROR."
+ " Aborting readv");
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ ret = -1;
+ goto unwind;
+ }
+
+ /* Serve readv from remote store only if it is remote. */
+ gf_msg_debug(this->name, 0, "status of file %s is %d",
+ local->remotepath ? local->remotepath : "", state);
+
+ /* We will reach this condition if local inode ctx had REMOTE
+ * state when the control was in cs_readv but after stat
+ * we got an updated state saying that the file is LOCAL.
+ */
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else if (state == GF_CS_REMOTE) {
+ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset,
+ size, flags);
+ /* Failed to submit the remote readv fop to plugin */
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ goto unwind;
+ }
+ /* When the file is in any other intermediate state,
+ * we should not perform remote reads.
+ */
+ } else {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ cs_local_t *local = NULL;
+ int ret = 0;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = -1;
+ cs_private_t *priv = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ priv = this->private;
+
+ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ goto err;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ if (ctx)
+ state = __cs_get_file_state(fd->inode, ctx);
+ else
+ state = GF_CS_LOCAL;
+
+ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ if (priv->remote_read) {
+ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size,
+ offset, flags, xdata);
+ } else {
+ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset,
+ flags, xdata);
+ }
+ if (!local->stub) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ goto err;
+ }
+
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else {
+ local->call_cnt++;
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags)
+{
+ int ret = 0;
+
+ ret = cs_serve_readv(frame, offset, size, flags);
return ret;
}
@@ -1059,7 +1448,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto err;
} else {
ret = __cs_inode_ctx_update(this, inode, val);
- gf_msg_debug(this->name, 0, "status : %lu", val);
+ gf_msg_debug(this->name, 0, "status : %" PRIu64, val);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
local->op_ret = -1;
@@ -1074,7 +1463,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto err;
}
- ret = dict_get_str(xdata, GF_CS_OBJECT_REMOTE, &filepath);
+ ret = dict_get_str_sizen(xdata, GF_CS_OBJECT_REMOTE, &filepath);
if (filepath) {
gf_msg_debug(this->name, 0, "filepath returned %s", filepath);
local->remotepath = gf_strdup(filepath);
@@ -1087,6 +1476,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
gf_msg_debug(this->name, 0, "NULL filepath");
}
+ ret = cs_update_xattrs(frame, xdata);
+ if (ret)
+ goto err;
+
local->op_ret = 0;
local->xattr_rsp = dict_ref(xdata);
memcpy(&local->stbuf, stbuf, sizeof(struct iatt));
@@ -1121,6 +1514,8 @@ cs_do_stat_check(call_frame_t *main_frame)
goto err;
}
+ cs_set_xattr_req(main_frame);
+
if (local->fd) {
STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req);
@@ -1177,6 +1572,10 @@ cs_common_cbk(call_frame_t *frame)
NULL, NULL, NULL);
break;
+ case GF_FOP_TRUNCATE:
+ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ NULL, NULL, NULL);
+ break;
default:
break;
}
@@ -1354,7 +1753,7 @@ cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
__cs_inode_ctx_get(this, loc->inode, &ctx);
- state = __cs_get_file_state(this, loc->inode, ctx);
+ state = __cs_get_file_state(loc->inode, ctx);
if (state == GF_CS_ERROR) {
/* file is already remote */
@@ -1396,7 +1795,7 @@ unwind:
}
gf_cs_obj_state
-__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx)
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx)
{
gf_cs_obj_state state = -1;
@@ -1427,7 +1826,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx)
if (ret)
*ctx = NULL;
else
- *ctx = (cs_inode_ctx_t *)ctxint;
+ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
return;
}
@@ -1452,7 +1851,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
ctx->state = val;
- ctxint = (uint64_t)ctx;
+ ctxint = (uint64_t)(uintptr_t)ctx;
ret = __inode_ctx_set(inode, this, &ctxint);
if (ret) {
@@ -1460,7 +1859,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
goto out;
}
} else {
- ctx = (cs_inode_ctx_t *)ctxint;
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
ctx->state = val;
}
@@ -1483,7 +1882,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode)
return 0;
}
- ctx = (cs_inode_ctx_t *)ctxint;
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
GF_FREE(ctx);
return 0;
@@ -1505,7 +1904,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
__cs_inode_ctx_get(this, inode, &ctx);
- state = __cs_get_file_state(this, inode, ctx);
+ state = __cs_get_file_state(inode, ctx);
if (state == GF_CS_ERROR) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"status is GF_CS_ERROR."
@@ -1532,6 +1931,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
out:
return ret;
}
+
int32_t
cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict)
{
@@ -1627,7 +2027,9 @@ struct xlator_fops cs_fops = {
.zerofill = cs_zerofill,
};
-struct xlator_cbks cs_cbks = {};
+struct xlator_cbks cs_cbks = {
+ .forget = cs_forget,
+};
struct xlator_dumpops cs_dumpops = {
.fdctx_to_dict = cs_fdctx_to_dict,
@@ -1647,6 +2049,15 @@ struct volume_options cs_options[] = {
{.key = {"cloudsync-storetype"},
.type = GF_OPTION_TYPE_STR,
.description = "Defines which remote store is enabled"},
+ {.key = {"cloudsync-remote-read"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Defines a remote read fop when on"},
+ {.key = {"cloudsync-store-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide store id"},
+ {.key = {"cloudsync-product-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide product id"},
{.key = {NULL}},
};
diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h
index 7c70c744d2b..d24141978d6 100644
--- a/xlators/features/cloudsync/src/cloudsync.h
+++ b/xlators/features/cloudsync/src/cloudsync.h
@@ -11,14 +11,15 @@
#ifndef __CLOUDSYNC_H__
#define __CLOUDSYNC_H__
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "call-stub.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
#include "cloudsync-common.h"
#include "cloudsync-autogen-fops.h"
+#define ALIGN_SIZE 4096
#define CS_LOCK_DOMAIN "cs.protect.file.stat"
typedef struct cs_dlstore {
off_t off;
@@ -29,6 +30,7 @@ typedef struct cs_dlstore {
} cs_dlstore;
typedef struct cs_inode_ctx {
+ cs_loc_xattr_t locxattr;
gf_cs_obj_state state;
} cs_inode_ctx_t;
@@ -85,7 +87,7 @@ void
__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx);
gf_cs_obj_state
-__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx);
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx);
int
cs_inodelk_unlock(call_frame_t *main_frame);
@@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
off_t offset, dict_t *xattr_req);
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags);
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags);
#endif /* __CLOUDSYNC_H__ */
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
index 71f446d51cd..f973ff56cf5 100644
--- a/xlators/features/compress/src/cdc-helper.c
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "logging.h"
-#include "syscall.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
#include "cdc.h"
#include "cdc-mem-types.h"
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
index 56a5a05ee8c..928afdd2efe 100644
--- a/xlators/features/compress/src/cdc-mem-types.h
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __CDC_MEM_TYPES_H
#define __CDC_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_cdc_mem_types {
gf_cdc_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
index adace6bcca8..b0b51e914ed 100644
--- a/xlators/features/compress/src/cdc.c
+++ b/xlators/features/compress/src/cdc.c
@@ -10,9 +10,9 @@
#include <sys/uio.h>
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
#include "cdc.h"
#include "cdc-mem-types.h"
@@ -334,3 +334,15 @@ struct volume_options options[] = {
"to disk as a gzip file."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "cdc",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
index 764f2028c75..cb87b06a989 100644
--- a/xlators/features/compress/src/cdc.h
+++ b/xlators/features/compress/src/cdc.h
@@ -15,7 +15,7 @@
#include "zlib.h"
#endif
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
index ee7fd794da8..1c4d0b93de2 100644
--- a/xlators/features/gfid-access/src/gfid-access-mem-types.h
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _GFID_ACCESS_MEM_TYPES_H
#define _GFID_ACCESS_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_changelog_mem_types {
gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
index 0cf7870856b..3fea5672a21 100644
--- a/xlators/features/gfid-access/src/gfid-access.c
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
#include "gfid-access.h"
-#include "inode.h"
-#include "byte-order.h"
-#include "statedump.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
int
ga_valid_inode_loc_copy(loc_t *dst, loc_t *src, xlator_t *this)
@@ -327,10 +327,8 @@ out:
static gf_boolean_t
__is_gfid_access_dir(uuid_t gfid)
{
- uuid_t aux_gfid;
-
- memset(aux_gfid, 0, 16);
- aux_gfid[15] = GF_AUX_GFID;
+ static uuid_t aux_gfid = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, GF_AUX_GFID};
if (gf_uuid_compare(gfid, aux_gfid) == 0)
return _gf_true;
@@ -448,14 +446,6 @@ ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
0,
};
- args = ga_newfile_parse_args(this, data);
- if (!args)
- goto out;
-
- ret = gf_uuid_parse(args->gfid, gfid);
- if (ret)
- goto out;
-
if (!xdata) {
xdata = dict_new();
} else {
@@ -467,6 +457,14 @@ ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
goto out;
}
+ args = ga_newfile_parse_args(this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse(args->gfid, gfid);
+ if (ret)
+ goto out;
+
ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc);
if (ret)
goto out;
@@ -1408,3 +1406,15 @@ struct volume_options options[] = {
/* This translator doesn't take any options, or provide any options */
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "gfid-access",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
index 68ebe539564..b1e255e56c0 100644
--- a/xlators/features/gfid-access/src/gfid-access.h
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -10,11 +10,11 @@
#ifndef __GFID_ACCESS_H__
#define __GFID_ACCESS_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "gfid-access-mem-types.h"
#define UUID_CANONICAL_FORM_LEN 36
diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am
deleted file mode 100644
index 060429ecf0f..00000000000
--- a/xlators/features/glupy/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src examples
-
-CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
deleted file mode 100644
index 4b8b863ef39..00000000000
--- a/xlators/features/glupy/doc/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-This is just the very start for a GlusterFS[1] meta-translator that will
-allow translator code to be written in Python. It's based on the standard
-Python embedding (not extending) techniques, plus a dash of the ctypes module.
-The interface is a pretty minimal adaptation of the dispatches and callbacks
-from the C API[2] to Python, as follows:
-
-* Dispatch functions and callbacks must be defined on an "xlator" class
- derived from gluster.Translator so that they'll be auto-registered with
- the C translator during initialization.
-
-* For each dispatch or callback function you want to intercept, you define a
- Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
-
-* The arguments for each operation are different, so you'll need to refer to
- the C API. GlusterFS-specific types are used (though only loc\_t is fully
- defined so far) and type correctness is enforced by ctypes.
-
-* If you do intercept a dispatch function, it is your responsibility to call
- xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
- the request to the next translator. If you do not intercept a function, it
- will default the same way as for C (pass through to the same operation with
- the same arguments on the first child translator).
-
-* If you intercept a callback function, it is your responsibility to call
- xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
- to the caller.
-
-So far only the lookup and create operations are handled this way, to support
-the "negative lookup" example. Now that the basic infrastructure is in place,
-adding more functions should be very quick, though with that much boilerplate I
-might pause to write a code generator. I also plan to add structure
-definitions and interfaces for some of the utility functions in libglusterfs
-(especially those having to do with inode and fd context) in the fairly near
-future. Note that you can also use ctypes to get at anything not explicitly
-exposed to Python already.
-
-_If you're coming here because of the Linux Journal article, please note that
-the code has evolved since that was written. The version that matches the
-article is here:_
-
-https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
-
-[1] http://www.gluster.org
-[2] http://pl.atyp.us/hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
deleted file mode 100644
index e05f17f498f..00000000000
--- a/xlators/features/glupy/doc/TESTING
+++ /dev/null
@@ -1,9 +0,0 @@
-Loading a translator written in Python using the glupy meta translator
--------------------------------------------------------------------------------
-'test.vol' is a simple volfile with the debug-trace Python translator on top
-of a brick. The volfile can be mounted using the following command.
-
-$ glusterfs --debug -f test.vol /path/to/mntpt
-
-If then file operations are performed on the newly mounted file system, log
-output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
deleted file mode 100644
index 0751a488c1f..00000000000
--- a/xlators/features/glupy/doc/test.vol
+++ /dev/null
@@ -1,10 +0,0 @@
-volume vol-posix
- type storage/posix
- option directory /path/to/brick
-end-volume
-
-volume vol-glupy
- type features/glupy
- option module-name debug-trace
- subvolumes vol-posix
-end-volume
diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am
deleted file mode 100644
index c26abeaafb6..00000000000
--- a/xlators/features/glupy/examples/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-glupyexamplesdir = $(xlatordir)/glupy
-
-glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py
diff --git a/xlators/features/glupy/examples/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py
deleted file mode 100644
index 6e012f6c547..00000000000
--- a/xlators/features/glupy/examples/debug-trace.py
+++ /dev/null
@@ -1,777 +0,0 @@
-
-from __future__ import print_function
-import sys
-import stat
-from uuid import UUID
-from time import strftime, localtime
-from gluster.glupy import *
-
-# This translator was written primarily to test the fop entry point definitions
-# and structure definitions in 'glupy.py'.
-
-# It is similar to the C language debug-trace translator, which logs the
-# arguments passed to the fops and their corresponding cbk functions.
-
-dl.get_id.restype = c_long
-dl.get_id.argtypes = [ POINTER(call_frame_t) ]
-
-dl.get_rootunique.restype = c_uint64
-dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
-
-def uuid2str (gfid):
- return str(UUID(''.join(map("{0:02x}".format, gfid))))
-
-
-def st_mode_from_ia (prot, filetype):
- st_mode = 0
- type_bit = 0
- prot_bit = 0
-
- if filetype == IA_IFREG:
- type_bit = stat.S_IFREG
- elif filetype == IA_IFDIR:
- type_bit = stat.S_IFDIR
- elif filetype == IA_IFLNK:
- type_bit = stat.S_IFLNK
- elif filetype == IA_IFBLK:
- type_bit = stat.S_IFBLK
- elif filetype == IA_IFCHR:
- type_bit = stat.S_IFCHR
- elif filetype == IA_IFIFO:
- type_bit = stat.S_IFIFO
- elif filetype == IA_IFSOCK:
- type_bit = stat.S_IFSOCK
- elif filetype == IA_INVAL:
- pass
-
-
- if prot.suid:
- prot_bit |= stat.S_ISUID
- if prot.sgid:
- prot_bit |= stat.S_ISGID
- if prot.sticky:
- prot_bit |= stat.S_ISVTX
-
- if prot.owner.read:
- prot_bit |= stat.S_IRUSR
- if prot.owner.write:
- prot_bit |= stat.S_IWUSR
- if prot.owner.execn:
- prot_bit |= stat.S_IXUSR
-
- if prot.group.read:
- prot_bit |= stat.S_IRGRP
- if prot.group.write:
- prot_bit |= stat.S_IWGRP
- if prot.group.execn:
- prot_bit |= stat.S_IXGRP
-
- if prot.other.read:
- prot_bit |= stat.S_IROTH
- if prot.other.write:
- prot_bit |= stat.S_IWOTH
- if prot.other.execn:
- prot_bit |= stat.S_IXOTH
-
- st_mode = (type_bit | prot_bit)
-
- return st_mode
-
-
-def trace_stat2str (buf):
- gfid = uuid2str(buf.contents.ia_gfid)
- mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
- atime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_atime))
- mtime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_mtime))
- ctime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_ctime))
- return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
- "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
- "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
- buf.contents.ia_nlink,
- buf.contents.ia_uid,
- buf.contents.ia_gid,
- buf.contents.ia_size,
- buf.contents.ia_blocks,
- atime_buf, mtime_buf,
- ctime_buf)
-
-class xlator(Translator):
-
- def __init__(self, c_this):
- Translator.__init__(self, c_this)
- self.gfids = {}
-
- def lookup_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.gfid)
- print(("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
- "path={2:s}").format(unique, gfid, loc.contents.path))
- self.gfids[key] = gfid
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent):
- unique =dl.get_rootunique(frame)
- key =dl.get_id(frame)
- if op_ret == 0:
- gfid = uuid2str(buf.contents.ia_gfid)
- statstr = trace_stat2str(buf)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *buf={3:s}; " +
- "*postparent={4:s}").format(unique, gfid,
- op_ret, statstr,
- postparentstr))
- else:
- gfid = self.gfids[key]
- print(("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
- " op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent)
- return 0
-
- def create_fop(self, frame, this, loc, flags, mode, umask, fd,
- xdata):
- unique = dl.get_rootunique(frame)
- gfid = uuid2str(loc.contents.gfid)
- print(("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
- "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
- "umask=0{6:o}").format(unique, gfid, loc.contents.path,
- fd, flags, mode, umask))
- dl.wind_create(frame, POINTER(xlator_t)(), loc, flags, mode,
- umask, fd, xdata)
- return 0
-
- def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- if op_ret >= 0:
- gfid = uuid2str(inode.contents.gfid)
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
- " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
- "*preparent={5:s};" +
- " *postparent={6:s}").format(unique, gfid, op_ret,
- fd, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
- "op_errno={2:d}").format(unique, op_ret, op_errno))
- dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata)
- return 0
-
- def open_fop(self, frame, this, loc, flags, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "flags={3:d}; fd={4:s}").format(unique, gfid,
- loc.contents.path, flags,
- fd))
- self.gfids[key] = gfid
- dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
- return 0
-
- def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
- "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
- op_ret, op_errno, fd))
- del self.gfids[key]
- dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
- xdata)
- return 0
-
- def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
- "fd={2:s}; size ={3:d}; offset={4:d}; " +
- "flags=0{5:x}").format(unique, gfid, fd, size, offset,
- flags))
- self.gfids[key] = gfid
- dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
- flags, xdata)
- return 0
-
- def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
- count, buf, iobref, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret >= 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
- "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
- op_ret,
- statstr))
-
- else:
- print(("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
- vector, count, buf, iobref, xdata)
- return 0
-
- def writev_fop(self, frame, this, fd, vector, count, offset, flags,
- iobref, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
- "fd={2:s}; count={3:d}; offset={4:d}; " +
- "flags=0{5:x}").format(unique, gfid, fd, count, offset,
- flags))
- self.gfids[key] = gfid
- dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
- offset, flags, iobref, xdata)
- return 0
-
- def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
- postbuf, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- if op_ret >= 0:
- preopstr = trace_stat2str(prebuf)
- postopstr = trace_stat2str(postbuf)
- print(("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
- "*prebuf={2:s}; " +
- "*postbuf={3:s}").format(unique, op_ret, preopstr,
- postopstr))
- else:
- gfid = self.gfids[key]
- print(("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata)
- return 0
-
- def opendir_fop(self, frame, this, loc, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "fd={3:s}").format(unique, gfid, loc.contents.path, fd))
- self.gfids[key] = gfid
- dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
- return 0
-
- def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
- " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
- op_errno, fd))
- del self.gfids[key]
- dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
- fd, xdata)
- return 0
-
- def readdir_fop(self, frame, this, fd, size, offset, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
- "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
- offset))
- self.gfids[key] = gfid
- dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
- xdata)
- return 0
-
- def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
- " op_errno={3:d}").format(unique, gfid, op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
- xdata)
- return 0
-
- def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
- " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
- offset))
- self.gfids[key] = gfid
- dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
- dictionary)
- return 0
-
- def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
- xdata)
- return 0
-
- def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
- unique = dl.get_rootunique(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
- "mode={3:d}; umask=0{4:o}").format(unique, gfid,
- loc.contents.path, mode,
- umask))
- dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
- xdata)
- return 0
-
- def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- if op_ret == 0:
- gfid = uuid2str(inode.contents.gfid)
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
- "*postbuf={5:s} ").format(unique, gfid, op_ret,
- statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
- "op_errno={2:d}").format(unique, op_ret, op_errno))
- dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata)
- return 0
-
- def rmdir_fop(self, frame, this, loc, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "flags={3:d}").format(unique, gfid, loc.contents.path,
- flags))
- self.gfids[key] = gfid
- dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
- return 0
-
- def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
- postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *prebuf={3:s}; "+
- "*postbuf={4:s}").format(unique, gfid, op_ret,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata)
- return 0
-
- def stat_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
- " path={2:s}").format(unique, gfid, loc.contents.path))
- self.gfids[key] = gfid
- dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *buf={3:s};").format(unique,
- gfid,
- op_ret,
- statstr))
- else:
- print(("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def fstat_fop(self, frame, this, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
- "fd={2:s}").format(unique, gfid, fd))
- self.gfids[key] = gfid
- dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
- return 0
-
- def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; *buf={3:s}").format(unique,
- gfid,
- op_ret,
- statstr))
- else:
- print(("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
- "op_ret={2:d}; op_errno={3:d}").format(unique.
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def statfs_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- if loc.contents.inode:
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- else:
- gfid = "0"
- print(("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
- "path={2:s}").format(unique, gfid, loc.contents.path))
- dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- if op_ret == 0:
- #TBD: print buf (pointer to an iovec type object)
- print(("GLUPY TRACE STATFS CBK {0:d}: "+
- "op_ret={1:d}").format(unique, op_ret))
- else:
- print(("GLUPY TRACE STATFS CBK- {0:d}"+
- "op_ret={1:d}; op_errno={2:d}").format(unique,
- op_ret,
- op_errno))
- dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def getxattr_fop(self, frame, this, loc, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " name={3:s}").format(unique, gfid, loc.contents.path,
- name))
- self.gfids[key]=gfid
- dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
- return 0
-
- def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- dictionary, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}; "+
- " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
- dictionary))
- del self.gfids[key]
- dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
- dictionary, xdata)
- return 0
-
- def fgetxattr_fop(self, frame, this, fd, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
- "name={3:s}").format(unique, gfid, fd, name))
- self.gfids[key] = gfid
- dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
- return 0
-
- def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- dictionary, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d};"+
- " dictionary={4:s}").format(unique, gfid, op_ret,
- op_errno, dictionary))
- del self.gfids[key]
- dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
- dictionary, xdata)
- return 0
-
- def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " flags={3:d}").format(unique, gfid, loc.contents.path,
- flags))
- self.gfids[key] = gfid
- dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
- flags, xdata)
- return 0
-
- def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
- "flags={3:d}").format(unique, gfid, fd, flags))
- self.gfids[key] = gfid
- dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
- flags, xdata)
- return 0
-
- def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def removexattr_fop(self, frame, this, loc, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
- "path={2:s}; name={3:s}").format(unique, gfid,
- loc.contents.path,
- name))
- self.gfids[key] = gfid
- dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
- xdata)
- return 0
-
- def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def link_fop(self, frame, this, oldloc, newloc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- if (newloc.contents.inode):
- newgfid = uuid2str(newloc.contents.inode.contents.gfid)
- else:
- newgfid = "0"
- oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
- print(("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
- "newgfid={3:s};"+
- "newpath={4:s}").format(unique, oldgfid,
- oldloc.contents.path,
- newgfid,
- newloc.contents.path))
- self.gfids[key] = oldgfid
- dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
- xdata)
- return 0
-
- def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
- "*stbuf={2:s}; *prebuf={3:s}; "+
- "*postbuf={4:s} ").format(unique, op_ret, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; "+
- "op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata)
- return 0
-
- def unlink_fop(self, frame, this, loc, xflag, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
- "flag={3:d}").format(unique, gfid, loc.contents.path,
- xflag))
- self.gfids[key] = gfid
- dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
- xdata)
- return 0
-
- def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
- "op_ret={2:d}; *prebuf={3:s}; "+
- "*postbuf={4:s} ").format(unique, gfid, op_ret,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
- "op_ret={2:d}; "+
- "op_errno={3:d}").format(unique, gfid, op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata)
- return 0
-
- def readlink_fop(self, frame, this, loc, size, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " size={3:d}").format(unique, gfid, loc.contents.path,
- size))
- self.gfids[key] = gfid
- dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
- xdata)
- return 0
-
- def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- buf, stbuf, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(stbuf)
- print(("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
- "*postbuf={5:s} ").format(unique, gfid,
- op_ret, op_errno,
- buf, statstr))
- else:
- print(("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
- stbuf, xdata)
- return 0
-
- def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
- "linkpath={2:s}; path={3:s};"+
- "umask=0{4:o}").format(unique, gfid, linkpath,
- loc.contents.path, umask))
- self.gfids[key] = gfid
- dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
- umask, xdata)
- return 0
-
- def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
- "*postparent={5:s}").format(unique, gfid,
- op_ret, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata)
- return 0
diff --git a/xlators/features/glupy/examples/helloworld.py b/xlators/features/glupy/examples/helloworld.py
deleted file mode 100644
index 282f9207949..00000000000
--- a/xlators/features/glupy/examples/helloworld.py
+++ /dev/null
@@ -1,21 +0,0 @@
-
-from __future__ import print_function
-import sys
-from gluster.glupy import *
-
-class xlator (Translator):
-
- def __init__(self, c_this):
- Translator.__init__(self, c_this)
-
- def lookup_fop(self, frame, this, loc, xdata):
- print("Python xlator: Hello!")
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent):
- print("Python xlator: Hello again!")
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent)
- return 0
diff --git a/xlators/features/glupy/examples/negative.py b/xlators/features/glupy/examples/negative.py
deleted file mode 100644
index e04b16aa553..00000000000
--- a/xlators/features/glupy/examples/negative.py
+++ /dev/null
@@ -1,93 +0,0 @@
-
-from __future__ import print_function
-import sys
-from uuid import UUID
-from gluster.glupy import *
-
-# Negative-lookup-caching example. If a file wasn't there the last time we
-# looked, it's probably still not there. This translator keeps track of
-# those failed lookups for us, and returns ENOENT without needing to pass the
-# call any further for repeated requests.
-
-# If we were doing this for real, we'd need separate caches for each xlator
-# instance. The easiest way to do this would be to have xlator.__init__
-# "register" each instance in a module-global dict, with the key as the C
-# translator address and the value as the xlator object itself. For testing
-# and teaching, it's sufficient just to have one cache. The keys are parent
-# GFIDs, and the entries are lists of names within that parent that we know
-# don't exist.
-cache = {}
-
-# TBD: we need a better way of handling per-request data (frame->local in C).
-dl.get_id.restype = c_long
-dl.get_id.argtypes = [ POINTER(call_frame_t) ]
-
-def uuid2str (gfid):
- return str(UUID(''.join(map("{0:02x}".format, gfid))))
-
-class xlator (Translator):
-
- def __init__ (self, c_this):
- self.requests = {}
- Translator.__init__(self, c_this)
-
- def lookup_fop (self, frame, this, loc, xdata):
- pargfid = uuid2str(loc.contents.pargfid)
- print("lookup FOP: %s:%s" % (pargfid, loc.contents.name))
- # Check the cache.
- if pargfid in cache:
- if loc.contents.name in cache[pargfid]:
- print("short-circuiting for %s:%s" % (pargfid,
- loc.contents.name))
- dl.unwind_lookup(frame, 0, this, -1, 2, None, None, None, None)
- return 0
- key = dl.get_id(frame)
- self.requests[key] = (pargfid, loc.contents.name[:])
- # TBD: get real child xl from init, pass it here
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent):
- print("lookup CBK: %d (%d)" % (op_ret, op_errno))
- key = dl.get_id(frame)
- pargfid, name = self.requests[key]
- # Update the cache.
- if op_ret == 0:
- print("found %s, removing from cache" % name)
- if pargfid in cache:
- cache[pargfid].discard(name)
- elif op_errno == 2: # ENOENT
- print("failed to find %s, adding to cache" % name)
- if pargfid in cache:
- cache[pargfid].add(name)
- else:
- cache[pargfid] = {name}
- del self.requests[key]
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent)
- return 0
-
- def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
- pargfid = uuid2str(loc.contents.pargfid)
- print("create FOP: %s:%s" % (pargfid, loc.contents.name))
- key = dl.get_id(frame)
- self.requests[key] = (pargfid, loc.contents.name[:])
- # TBD: get real child xl from init, pass it here
- dl.wind_create(frame, POINTER(xlator_t)(), loc, flags, mode, umask, fd, xdata)
- return 0
-
- def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
- buf, preparent, postparent, xdata):
- print("create CBK: %d (%d)" % (op_ret, op_errno))
- key = dl.get_id(frame)
- pargfid, name = self.requests[key]
- # Update the cache.
- if op_ret == 0:
- print("created %s, removing from cache" % name)
- if pargfid in cache:
- cache[pargfid].discard(name)
- del self.requests[key]
- dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata)
- return 0
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
deleted file mode 100644
index 817b0d00f61..00000000000
--- a/xlators/features/glupy/src/Makefile.am
+++ /dev/null
@@ -1,36 +0,0 @@
-xlator_LTLIBRARIES = glupy.la
-
-# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-glupydir = $(xlatordir)/glupy
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) \
- -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall -fno-strict-aliasing \
- -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" \
- -DPATH_GLUSTERFS_GLUPY_MODULE=\"${xlatordir}/glupy${shrext_cmds}\" \
- $(GF_CFLAGS) $(PYTHON_CFLAGS)
-
-# Flags to build glupy.so with
-glupy_la_LDFLAGS = -module -nostartfiles \
- -export-symbols $(top_srcdir)/xlators/features/glupy/src/glupy.sym \
- $(GF_XLATOR_LDFLAGS) $(PYTHON_LIBS)
-
-glupy_la_SOURCES = glupy.c
-glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -lpthread $(LIB_DL)
-
-noinst_HEADERS = glupy.h
-
-# Install __init__.py into the Python site-packages area
-pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
-pyglupy_PYTHON = __init__.py
-
-# Install glupy/__init_-.py into the Python site-packages area
-SUBDIRS = glupy
-
-CLEANFILES =
-
-EXTRA_DIST = glupy.sym
diff --git a/xlators/features/glupy/src/__init__.py.in b/xlators/features/glupy/src/__init__.py.in
deleted file mode 100644
index 3ad9513f40e..00000000000
--- a/xlators/features/glupy/src/__init__.py.in
+++ /dev/null
@@ -1,2 +0,0 @@
-from pkgutil import extend_path
-__path__ = extend_path(__path__, __name__)
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
deleted file mode 100644
index d1a111eab4a..00000000000
--- a/xlators/features/glupy/src/glupy.c
+++ /dev/null
@@ -1,2446 +0,0 @@
-/*
- Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <ctype.h>
-#include <dlfcn.h>
-#include <sys/uio.h>
-#include <Python.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#include "glupy.h"
-
-/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
-
-pthread_key_t gil_init_key;
-
-PyGILState_STATE
-glupy_enter(void)
-{
- if (!pthread_getspecific(gil_init_key)) {
- PyEval_ReleaseLock();
- (void)pthread_setspecific(gil_init_key, (void *)1);
- }
-
- return PyGILState_Ensure();
-}
-
-void
-glupy_leave(PyGILState_STATE gstate)
-{
- PyGILState_Release(gstate);
-}
-
-/* FOP: LOOKUP */
-
-int32_t
-glupy_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_LOOKUP]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, xdata, postparent);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-int32_t
-glupy_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_LOOKUP]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-void
-wind_lookup(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_lookup_cbk, xl, xl->fops->lookup, loc, xdata);
-}
-
-void
-unwind_lookup(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
-}
-
-void
-set_lookup_fop(long py_this, fop_lookup_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_LOOKUP] = (long)fop;
-}
-
-void
-set_lookup_cbk(long py_this, fop_lookup_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_LOOKUP] = (long)cbk;
-}
-
-/* FOP: CREATE */
-
-int32_t
-glupy_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_CREATE]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
- frame, cookie, this, op_ret, op_errno, fd, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_CREATE]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(frame, this, loc, flags,
- mode, umask, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-}
-
-void
-wind_create(call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_create_cbk, xl, xl->fops->create, loc, flags, mode,
- umask, fd, xdata);
-}
-
-void
-unwind_create(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
-}
-
-void
-set_create_fop(long py_this, fop_create_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_CREATE] = (long)fop;
-}
-
-void
-set_create_cbk(long py_this, fop_create_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_CREATE] = (long)cbk;
-}
-
-/* FOP: OPEN */
-
-int32_t
-glupy_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_OPEN]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
- frame, cookie, this, op_ret, op_errno, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-glupy_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_OPEN]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(frame, this, loc, flags, fd,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-void
-wind_open(call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_open_cbk, xl, xl->fops->open, loc, flags, fd,
- xdata);
-}
-
-void
-unwind_open(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
-}
-
-void
-set_open_fop(long py_this, fop_open_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_OPEN] = (long)fop;
-}
-
-void
-set_open_cbk(long py_this, fop_open_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_OPEN] = (long)cbk;
-}
-
-/* FOP: READV */
-
-int32_t
-glupy_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READV]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
- frame, cookie, this, op_ret, op_errno, vector, count, stbuf, iobref,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
- return 0;
-}
-
-int32_t
-glupy_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READV]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(frame, this, fd, size,
- offset, flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-void
-wind_readv(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size, offset,
- flags, xdata);
-}
-
-void
-unwind_readv(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
-}
-
-void
-set_readv_fop(long py_this, fop_readv_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_READV] = (long)fop;
-}
-
-void
-set_readv_cbk(long py_this, fop_readv_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_READV] = (long)cbk;
-}
-
-/* FOP: WRITEV */
-
-int32_t
-glupy_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_WRITEV]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
- frame, cookie, this, op_ret, op_errno, prebuf, postbuf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_WRITEV]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
- frame, this, fd, vector, count, offset, flags, iobref, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- flags, iobref, xdata);
- return 0;
-}
-
-void
-wind_writev(call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector, count,
- offset, flags, iobref, xdata);
-}
-
-void
-unwind_writev(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-}
-
-void
-set_writev_fop(long py_this, fop_writev_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_WRITEV] = (long)fop;
-}
-
-void
-set_writev_cbk(long py_this, fop_writev_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_WRITEV] = (long)cbk;
-}
-
-/* FOP: OPENDIR */
-
-int32_t
-glupy_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_OPENDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
- frame, cookie, this, op_ret, op_errno, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-glupy_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_OPENDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(frame, this, loc, fd,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-void
-wind_opendir(call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_opendir_cbk, xl, xl->fops->opendir, loc, fd, xdata);
-}
-
-void
-unwind_opendir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
-}
-
-void
-set_opendir_fop(long py_this, fop_opendir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_OPENDIR] = (long)fop;
-}
-
-void
-set_opendir_cbk(long py_this, fop_opendir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_OPENDIR] = (long)cbk;
-}
-
-/* FOP: READDIR */
-
-int32_t
-glupy_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
- frame, cookie, this, op_ret, op_errno, entries, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-glupy_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(frame, this, fd, size,
- offset, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
- return 0;
-}
-
-void
-wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readdir_cbk, xl, xl->fops->readdir, fd, size,
- offset, xdata);
-}
-
-void
-unwind_readdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata);
-}
-
-void
-set_readdir_fop(long py_this, fop_readdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READDIR] = (long)fop;
-}
-
-void
-set_readdir_cbk(long py_this, fop_readdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READDIR] = (long)cbk;
-}
-
-/* FOP: READDIRP */
-
-int32_t
-glupy_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READDIRP]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
- frame, cookie, this, op_ret, op_errno, entries, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-glupy_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READDIRP]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(frame, this, fd, size,
- offset, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- return 0;
-}
-
-void
-wind_readdirp(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readdirp_cbk, xl, xl->fops->readdirp, fd, size,
- offset, xdata);
-}
-
-void
-unwind_readdirp(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
-}
-
-void
-set_readdirp_fop(long py_this, fop_readdirp_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READDIRP] = (long)fop;
-}
-
-void
-set_readdirp_cbk(long py_this, fop_readdirp_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READDIRP] = (long)cbk;
-}
-
-/* FOP:STAT */
-
-int32_t
-glupy_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_STAT]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_STAT]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-void
-wind_stat(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_stat_cbk, xl, xl->fops->stat, loc, xdata);
-}
-
-void
-unwind_stat(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_stat_fop(long py_this, fop_stat_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_STAT] = (long)fop;
-}
-
-void
-set_stat_cbk(long py_this, fop_stat_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_STAT] = (long)cbk;
-}
-
-/* FOP: FSTAT */
-
-int32_t
-glupy_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FSTAT]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FSTAT]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(frame, this, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-void
-wind_fstat(call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fstat_cbk, xl, xl->fops->fstat, fd, xdata);
-}
-
-void
-unwind_fstat(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_fstat_fop(long py_this, fop_fstat_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FSTAT] = (long)fop;
-}
-
-void
-set_fstat_cbk(long py_this, fop_fstat_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FSTAT] = (long)cbk;
-}
-
-/* FOP:STATFS */
-
-int32_t
-glupy_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_STATFS]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_STATFS]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-void
-wind_statfs(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_statfs_cbk, xl, xl->fops->statfs, loc, xdata);
-}
-
-void
-unwind_statfs(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct statvfs *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_statfs_fop(long py_this, fop_statfs_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_STATFS] = (long)fop;
-}
-
-void
-set_statfs_cbk(long py_this, fop_statfs_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_STATFS] = (long)cbk;
-}
-
-/* FOP: SETXATTR */
-
-int32_t
-glupy_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_SETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_SETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(frame, this, loc, dict,
- flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-void
-wind_setxattr(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_setxattr_cbk, xl, xl->fops->setxattr, loc, dict,
- flags, xdata);
-}
-
-void
-unwind_setxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_setxattr_fop(long py_this, fop_setxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_SETXATTR] = (long)fop;
-}
-
-void
-set_setxattr_cbk(long py_this, fop_setxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_SETXATTR] = (long)cbk;
-}
-
-/* FOP: GETXATTR */
-
-int32_t
-glupy_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_GETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
- frame, cookie, this, op_ret, op_errno, dict, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-glupy_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_GETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(frame, this, loc, name,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-void
-wind_getxattr(call_frame_t *frame, xlator_t *xl, loc_t *loc, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_getxattr_cbk, xl, xl->fops->getxattr, loc, name,
- xdata);
-}
-
-void
-unwind_getxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
-}
-
-void
-set_getxattr_fop(long py_this, fop_getxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_GETXATTR] = (long)fop;
-}
-
-void
-set_getxattr_cbk(long py_this, fop_getxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_GETXATTR] = (long)cbk;
-}
-
-/* FOP: FSETXATTR */
-
-int32_t
-glupy_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FSETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FSETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(frame, this, fd,
- dict, flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-void
-wind_fsetxattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr, fd, dict,
- flags, xdata);
-}
-
-void
-unwind_fsetxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_fsetxattr_fop(long py_this, fop_fsetxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FSETXATTR] = (long)fop;
-}
-
-void
-set_fsetxattr_cbk(long py_this, fop_fsetxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
-}
-
-/* FOP: FGETXATTR */
-
-int32_t
-glupy_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FGETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
- frame, cookie, this, op_ret, op_errno, dict, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-glupy_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FGETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(frame, this, fd,
- name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-void
-wind_fgetxattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr, fd, name,
- xdata);
-}
-
-void
-unwind_fgetxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
-}
-
-void
-set_fgetxattr_fop(long py_this, fop_fgetxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FGETXATTR] = (long)fop;
-}
-
-void
-set_fgetxattr_cbk(long py_this, fop_fgetxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
-}
-
-/* FOP:REMOVEXATTR */
-
-int32_t
-glupy_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_REMOVEXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_REMOVEXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(frame, this, loc,
- name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-void
-wind_removexattr(call_frame_t *frame, xlator_t *xl, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_removexattr_cbk, xl, xl->fops->removexattr, loc,
- name, xdata);
-}
-
-void
-unwind_removexattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_removexattr_fop(long py_this, fop_removexattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
-}
-
-void
-set_removexattr_cbk(long py_this, fop_removexattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
-}
-
-/* FOP:FREMOVEXATTR */
-
-int32_t
-glupy_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FREMOVEXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
- frame, this, fd, name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-void
-wind_fremovexattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr, fd,
- name, xdata);
-}
-
-void
-unwind_fremovexattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_fremovexattr_fop(long py_this, fop_fremovexattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
-}
-
-void
-set_fremovexattr_cbk(long py_this, fop_fremovexattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
-}
-
-/* FOP: LINK*/
-int32_t
-glupy_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_LINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_LINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(frame, this, oldloc, newloc,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-void
-wind_link(call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_link_cbk, xl, xl->fops->link, oldloc, newloc,
- xdata);
-}
-
-void
-unwind_link(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_link_fop(long py_this, fop_link_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_LINK] = (long)fop;
-}
-
-void
-set_link_cbk(long py_this, fop_link_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_LINK] = (long)cbk;
-}
-
-/* FOP: SYMLINK*/
-int32_t
-glupy_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_SYMLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_SYMLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(frame, this, linkname,
- loc, umask, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
- return 0;
-}
-
-void
-wind_symlink(call_frame_t *frame, xlator_t *xl, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_symlink_cbk, xl, xl->fops->symlink, linkname, loc,
- umask, xdata);
-}
-
-void
-unwind_symlink(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_symlink_fop(long py_this, fop_symlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_SYMLINK] = (long)fop;
-}
-
-void
-set_symlink_cbk(long py_this, fop_symlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_SYMLINK] = (long)cbk;
-}
-
-/* FOP: READLINK */
-int32_t
-glupy_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
- frame, cookie, this, op_ret, op_errno, path, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(frame, this, loc, size,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-void
-wind_readlink(call_frame_t *frame, xlator_t *xl, loc_t *loc, size_t size,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readlink_cbk, xl, xl->fops->readlink, loc, size,
- xdata);
-}
-
-void
-unwind_readlink(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata);
-}
-
-void
-set_readlink_fop(long py_this, fop_readlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READLINK] = (long)fop;
-}
-
-void
-set_readlink_cbk(long py_this, fop_readlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READLINK] = (long)cbk;
-}
-
-/* FOP: UNLINK */
-
-int32_t
-glupy_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_UNLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
- frame, cookie, this, op_ret, op_errno, preparent, postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_UNLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(frame, this, loc, xflags,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata);
- return 0;
-}
-
-void
-wind_unlink(call_frame_t *frame, xlator_t *xl, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_unlink_cbk, xl, xl->fops->unlink, loc, xflags,
- xdata);
-}
-
-void
-unwind_unlink(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-}
-
-void
-set_unlink_fop(long py_this, fop_unlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_UNLINK] = (long)fop;
-}
-
-void
-set_unlink_cbk(long py_this, fop_unlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_UNLINK] = (long)cbk;
-}
-
-/* FOP: MKDIR */
-
-int32_t
-glupy_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_MKDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_MKDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(frame, this, loc, mode,
- umask, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-void
-wind_mkdir(call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_mkdir_cbk, xl, xl->fops->mkdir, loc, mode, umask,
- xdata);
-}
-
-void
-unwind_mkdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_mkdir_fop(long py_this, fop_mkdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_MKDIR] = (long)fop;
-}
-
-void
-set_mkdir_cbk(long py_this, fop_mkdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_MKDIR] = (long)cbk;
-}
-
-/* FOP: RMDIR */
-
-int32_t
-glupy_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_RMDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
- frame, cookie, this, op_ret, op_errno, preparent, postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_RMDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(frame, this, loc, xflags,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
- return 0;
-}
-
-void
-wind_rmdir(call_frame_t *frame, xlator_t *xl, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_rmdir_cbk, xl, xl->fops->rmdir, loc, xflags, xdata);
-}
-
-void
-unwind_rmdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-}
-
-void
-set_rmdir_fop(long py_this, fop_rmdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_RMDIR] = (long)fop;
-}
-
-void
-set_rmdir_cbk(long py_this, fop_rmdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_RMDIR] = (long)cbk;
-}
-
-/* NON-FOP-SPECIFIC CODE */
-
-long
-get_id(call_frame_t *frame)
-{
- return (long)(frame->local);
-}
-
-uint64_t
-get_rootunique(call_frame_t *frame)
-{
- return frame->root->unique;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init(this, gf_glupy_mt_end);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-static void
-py_error_log(const char *name, PyObject *pystr)
-{
-#if PY_MAJOR_VERSION > 2
- char scr[256];
- if (PyUnicode_Check(pystr)) {
- PyObject *tmp = PyUnicode_AsEncodedString(pystr, "UTF-8", "strict");
- if (tmp != NULL) {
- strncpy(scr, PyBytes_AS_STRING(pystr), sizeof(scr));
- Py_DECREF(tmp);
- } else {
- strncpy(scr, "string encoding error", sizeof(scr));
- }
- } else if (PyBytes_Check(pystr)) {
- strncpy(scr, PyBytes_AS_STRING(pystr), sizeof(scr));
- } else {
- strncpy(scr, "string encoding error", sizeof(scr));
- }
- gf_log(name, GF_LOG_ERROR, "Python error: %s", scr);
-#else
- gf_log(name, GF_LOG_ERROR, "Python error: %s", PyString_AsString(pystr));
-#endif
-}
-
-static PyObject *
-encode(const char *str)
-{
-#if PY_MAJOR_VERSION > 2
- return PyUnicode_FromString(str);
-#else
- return PyString_FromString(str);
-#endif
-}
-
-int32_t
-init(xlator_t *this)
-{
- glupy_private_t *priv = NULL;
- char *module_name = NULL;
- PyObject *py_mod_name = NULL;
- PyObject *py_init_func = NULL;
- PyObject *py_args = NULL;
- PyObject *syspath = NULL;
- PyObject *path = NULL;
- PyObject *error_type = NULL;
- PyObject *error_msg = NULL;
- PyObject *error_bt = NULL;
- static gf_boolean_t py_inited = _gf_false;
- void *err_cleanup = &&err_return;
- char libpython[16];
-
- if (dict_get_str(this->options, "module-name", &module_name) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "missing module-name");
- return -1;
- }
-
- priv = GF_CALLOC(1, sizeof(glupy_private_t), gf_glupy_mt_priv);
- if (!priv) {
- goto *err_cleanup;
- }
- this->private = priv;
- err_cleanup = &&err_free_priv;
-
- if (!py_inited) {
- /* FIXME:
- * This hack is necessary because glusterfs (rightly) loads
- * glupy.so with RTLD_LOCAL but glupy needs libpython to be
- * loaded with RTLD_GLOBAL even though glupy is correctly
- * linked with libpython.
- * This is needed because one of the internal modules of
- * python 2.x (lib-dynload/_struct.so) does not explicitly
- * link with libpython.
- */
- snprintf(libpython, sizeof(libpython), "libpython%d.%d.so",
- PY_MAJOR_VERSION, PY_MINOR_VERSION);
- if (!dlopen(libpython, RTLD_NOW | RTLD_GLOBAL)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED,
- "dlopen(%s) failed: %s", libpython, dlerror());
- }
-
- /*
- * This must be done before Py_Initialize(),
- * because it will duplicate the environment,
- * and fail to see later environment updates.
- */
- setenv("PATH_GLUSTERFS_GLUPY_MODULE", PATH_GLUSTERFS_GLUPY_MODULE, 1);
-
- Py_Initialize();
- PyEval_InitThreads();
-
- (void)pthread_key_create(&gil_init_key, NULL);
- (void)pthread_setspecific(gil_init_key, (void *)1);
-
- /* PyEval_InitThreads takes this "for" us. No thanks. */
- PyEval_ReleaseLock();
- py_inited = _gf_true;
- }
-
- /* Adjust python's path */
- syspath = PySys_GetObject("path");
- path = encode(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- py_mod_name = encode(module_name);
- if (!py_mod_name) {
- gf_log(this->name, GF_LOG_ERROR, "could not create name");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
-
- gf_log(this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name);
- priv->py_module = PyImport_Import(py_mod_name);
- Py_DECREF(py_mod_name);
- if (!priv->py_module) {
- gf_log(this->name, GF_LOG_ERROR, "Python import of %s failed",
- module_name);
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- gf_log(this->name, GF_LOG_INFO, "Import of %s succeeded", module_name);
- err_cleanup = &&err_deref_module;
-
- py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
- if (!py_init_func || !PyCallable_Check(py_init_func)) {
- gf_log(this->name, GF_LOG_ERROR, "missing init func");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- err_cleanup = &&err_deref_init;
-
- py_args = PyTuple_New(1);
- if (!py_args) {
- gf_log(this->name, GF_LOG_ERROR, "could not create args");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- PyTuple_SetItem(py_args, 0, PyLong_FromLong((long)this));
-
- /* TBD: pass in list of children */
- priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
- Py_DECREF(py_args);
- if (!priv->py_xlator) {
- gf_log(this->name, GF_LOG_ERROR, "Python init failed");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- gf_log(this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator);
-
- return 0;
-
-err_deref_init:
- Py_DECREF(py_init_func);
-err_deref_module:
- Py_DECREF(priv->py_module);
-err_free_priv:
- GF_FREE(priv);
-err_return:
- return -1;
-}
-
-void
-fini(xlator_t *this)
-{
- glupy_private_t *priv = this->private;
-
- if (!priv)
- return;
- Py_DECREF(priv->py_xlator);
- Py_DECREF(priv->py_module);
- this->private = NULL;
- GF_FREE(priv);
-
- return;
-}
-
-struct xlator_fops fops = {.lookup = glupy_lookup,
- .create = glupy_create,
- .open = glupy_open,
- .readv = glupy_readv,
- .writev = glupy_writev,
- .opendir = glupy_opendir,
- .readdir = glupy_readdir,
- .stat = glupy_stat,
- .fstat = glupy_fstat,
- .setxattr = glupy_setxattr,
- .getxattr = glupy_getxattr,
- .fsetxattr = glupy_fsetxattr,
- .fgetxattr = glupy_fgetxattr,
- .removexattr = glupy_removexattr,
- .fremovexattr = glupy_fremovexattr,
- .link = glupy_link,
- .unlink = glupy_unlink,
- .readlink = glupy_readlink,
- .symlink = glupy_symlink,
- .mkdir = glupy_mkdir,
- .rmdir = glupy_rmdir,
- .statfs = glupy_statfs,
- .readdirp = glupy_readdirp};
-
-struct xlator_cbks cbks = {};
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
deleted file mode 100644
index 851b02154d2..00000000000
--- a/xlators/features/glupy/src/glupy.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __GLUPY_H__
-#define __GLUPY_H__
-
-#include "mem-types.h"
-
-enum {
- GLUPY_LOOKUP = 0,
- GLUPY_CREATE,
- GLUPY_OPEN,
- GLUPY_READV,
- GLUPY_WRITEV,
- GLUPY_OPENDIR,
- GLUPY_READDIR,
- GLUPY_READDIRP,
- GLUPY_STAT,
- GLUPY_FSTAT,
- GLUPY_STATFS,
- GLUPY_SETXATTR,
- GLUPY_GETXATTR,
- GLUPY_FSETXATTR,
- GLUPY_FGETXATTR,
- GLUPY_REMOVEXATTR,
- GLUPY_FREMOVEXATTR,
- GLUPY_LINK,
- GLUPY_UNLINK,
- GLUPY_READLINK,
- GLUPY_SYMLINK,
- GLUPY_MKNOD,
- GLUPY_MKDIR,
- GLUPY_RMDIR,
- GLUPY_N_FUNCS
-};
-
-typedef struct {
- PyObject *py_module;
- PyObject *py_xlator;
- long fops[GLUPY_N_FUNCS];
- long cbks[GLUPY_N_FUNCS];
-} glupy_private_t;
-
-enum gf_glupy_mem_types_ {
- gf_glupy_mt_priv = gf_common_mt_end + 1,
- gf_glupy_mt_end
-};
-
-#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/glupy.sym b/xlators/features/glupy/src/glupy.sym
deleted file mode 100644
index 55d9a300108..00000000000
--- a/xlators/features/glupy/src/glupy.sym
+++ /dev/null
@@ -1,101 +0,0 @@
-init
-fini
-fops
-cbks
-options
-notify
-mem_acct_init
-reconfigure
-dumpops
-set_lookup_fop
-set_lookup_cbk
-set_create_fop
-set_create_cbk
-set_open_fop
-set_open_cbk
-set_readv_fop
-set_readv_cbk
-set_writev_fop
-set_writev_cbk
-set_opendir_fop
-set_opendir_cbk
-set_readdir_fop
-set_readdir_cbk
-set_readdirp_fop
-set_readdirp_cbk
-set_stat_fop
-set_stat_cbk
-set_fstat_fop
-set_fstat_cbk
-set_statfs_fop
-set_statfs_cbk
-set_setxattr_fop
-set_setxattr_cbk
-set_getxattr_fop
-set_getxattr_cbk
-set_fsetxattr_fop
-set_fsetxattr_cbk
-set_fgetxattr_fop
-set_fgetxattr_cbk
-set_removexattr_fop
-set_removexattr_cbk
-set_fremovexattr_fop
-set_fremovexattr_cbk
-set_link_fop
-set_link_cbk
-set_symlink_fop
-set_symlink_cbk
-set_readlink_fop
-set_readlink_cbk
-set_unlink_fop
-set_unlink_cbk
-set_mkdir_fop
-set_mkdir_cbk
-set_rmdir_fop
-set_rmdir_cbk
-wind_lookup
-wind_create
-wind_open
-wind_readv
-wind_writev
-wind_opendir
-wind_readdir
-wind_readdirp
-wind_stat
-wind_fstat
-wind_statfs
-wind_setxattr
-wind_getxattr
-wind_fsetxattr
-wind_fgetxattr
-wind_removexattr
-wind_fremovexattr
-wind_link
-wind_symlink
-wind_readlink
-wind_unlink
-wind_mkdir
-wind_rmdir
-unwind_lookup
-unwind_create
-unwind_open
-unwind_readv
-unwind_writev
-unwind_opendir
-unwind_readdir
-unwind_readdirp
-unwind_stat
-unwind_fstat
-unwind_statfs
-unwind_setxattr
-unwind_getxattr
-unwind_fsetxattr
-unwind_fgetxattr
-unwind_removexattr
-unwind_fremovexattr
-unwind_link
-unwind_symlink
-unwind_readlink
-unwind_unlink
-unwind_mkdir
-unwind_rmdir
diff --git a/xlators/features/glupy/src/glupy/Makefile.am b/xlators/features/glupy/src/glupy/Makefile.am
deleted file mode 100644
index 573d2da12e1..00000000000
--- a/xlators/features/glupy/src/glupy/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-# Install __init__.py into the Python site-packages area
-pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/glupy
-pyglupy_PYTHON = __init__.py
-
-CLEANFILES =
diff --git a/xlators/features/glupy/src/glupy/__init__.py b/xlators/features/glupy/src/glupy/__init__.py
deleted file mode 100644
index 576fbdb9945..00000000000
--- a/xlators/features/glupy/src/glupy/__init__.py
+++ /dev/null
@@ -1,852 +0,0 @@
-##
-## Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
-## This file is part of GlusterFS.
-##
-## This file is licensed to you under your choice of the GNU Lesser
-## General Public License, version 3 or any later version (LGPLv3 or
-## later), or the GNU General Public License, version 2 (GPLv2), in all
-## cases as published by the Free Software Foundation.
-##
-
-import sys
-import os
-from ctypes import *
-
-dl = CDLL(os.getenv("PATH_GLUSTERFS_GLUPY_MODULE", ""), RTLD_GLOBAL)
-
-
-class call_frame_t (Structure):
- pass
-
-class dev_t (Structure):
- pass
-
-
-class dict_t (Structure):
- pass
-
-
-class gf_dirent_t (Structure):
- pass
-
-
-class iobref_t (Structure):
- pass
-
-
-class iovec_t (Structure):
- pass
-
-
-class list_head (Structure):
- pass
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head))
- ]
-
-
-class rwxperm_t (Structure):
- _fields_ = [
- ("read", c_uint8, 1),
- ("write", c_uint8, 1),
- ("execn", c_uint8, 1)
- ]
-
-
-class statvfs_t (Structure):
- pass
-
-
-class xlator_t (Structure):
- pass
-
-
-class ia_prot_t (Structure):
- _fields_ = [
- ("suid", c_uint8, 1),
- ("sgid", c_uint8, 1),
- ("sticky", c_uint8, 1),
- ("owner", rwxperm_t),
- ("group", rwxperm_t),
- ("other", rwxperm_t)
- ]
-
-# For checking file type.
-(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
- IA_IFSOCK) = range(8)
-
-
-class iatt_t (Structure):
- _fields_ = [
- ("ia_no", c_uint64),
- ("ia_gfid", c_ubyte * 16),
- ("ia_dev", c_uint64),
- ("ia_type", c_uint),
- ("ia_prot", ia_prot_t),
- ("ia_nlink", c_uint32),
- ("ia_uid", c_uint32),
- ("ia_gid", c_uint32),
- ("ia_rdev", c_uint64),
- ("ia_size", c_uint64),
- ("ia_blksize", c_uint32),
- ("ia_blocks", c_uint64),
- ("ia_atime", c_uint32 ),
- ("ia_atime_nsec", c_uint32),
- ("ia_mtime", c_uint32),
- ("ia_mtime_nsec", c_uint32),
- ("ia_ctime", c_uint32),
- ("ia_ctime_nsec", c_uint32)
- ]
-
-
-class mem_pool (Structure):
- _fields_ = [
- ("list", list_head),
- ("hot_count", c_int),
- ("cold_count", c_int),
- ("lock", c_void_p),
- ("padded_sizeof_type", c_ulong),
- ("pool", c_void_p),
- ("pool_end", c_void_p),
- ("real_sizeof_type", c_int),
- ("alloc_count", c_uint64),
- ("pool_misses", c_uint64),
- ("max_alloc", c_int),
- ("curr_stdalloc", c_int),
- ("max_stdalloc", c_int),
- ("name", c_char_p),
- ("global_list", list_head)
- ]
-
-
-class U_ctx_key_inode (Union):
- _fields_ = [
- ("key", c_uint64),
- ("xl_key", POINTER(xlator_t))
- ]
-
-
-class U_ctx_value1 (Union):
- _fields_ = [
- ("value1", c_uint64),
- ("ptr1", c_void_p)
- ]
-
-
-class U_ctx_value2 (Union):
- _fields_ = [
- ("value2", c_uint64),
- ("ptr2", c_void_p)
- ]
-
-class inode_ctx (Structure):
- _anonymous_ = ("u_key", "u_value1", "u_value2",)
- _fields_ = [
- ("u_key", U_ctx_key_inode),
- ("u_value1", U_ctx_value1),
- ("u_value2", U_ctx_value2)
- ]
-
-class inode_t (Structure):
- pass
-
-class inode_table_t (Structure):
- _fields_ = [
- ("lock", c_void_p),
- ("hashsize", c_size_t),
- ("name", c_char_p),
- ("root", POINTER(inode_t)),
- ("xl", POINTER(xlator_t)),
- ("lru_limit", c_uint32),
- ("inode_hash", POINTER(list_head)),
- ("name_hash", POINTER(list_head)),
- ("active", list_head),
- ("active_size", c_uint32),
- ("lru", list_head),
- ("lru_size", c_uint32),
- ("purge", list_head),
- ("purge_size", c_uint32),
- ("inode_pool", POINTER(mem_pool)),
- ("dentry_pool", POINTER(mem_pool)),
- ("fd_mem_pool", POINTER(mem_pool))
- ]
-
-inode_t._fields_ = [
- ("table", POINTER(inode_table_t)),
- ("gfid", c_ubyte * 16),
- ("lock", c_void_p),
- ("nlookup", c_uint64),
- ("fd_count", c_uint32),
- ("ref", c_uint32),
- ("ia_type", c_uint),
- ("fd_list", list_head),
- ("dentry_list", list_head),
- ("hashv", list_head),
- ("listv", list_head),
- ("ctx", POINTER(inode_ctx))
- ]
-
-
-
-class U_ctx_key_fd (Union):
- _fields_ = [
- ("key", c_uint64),
- ("xl_key", c_void_p)
- ]
-
-class fd_lk_ctx (Structure):
- _fields_ = [
- ("lk_list", list_head),
- ("ref", c_int),
- ("lock", c_void_p)
- ]
-
-class fd_ctx (Structure):
- _anonymous_ = ("u_key", "u_value1")
- _fields_ = [
- ("u_key", U_ctx_key_fd),
- ("u_value1", U_ctx_value1)
- ]
-
-class fd_t (Structure):
- _fields_ = [
- ("pid", c_uint64),
- ("flags", c_int32),
- ("refcount", c_int32),
- ("inode_list", list_head),
- ("inode", POINTER(inode_t)),
- ("lock", c_void_p),
- ("ctx", POINTER(fd_ctx)),
- ("xl_count", c_int),
- ("lk_ctx", POINTER(fd_lk_ctx)),
- ("anonymous", c_uint)
- ]
-
-class loc_t (Structure):
- _fields_ = [
- ("path", c_char_p),
- ("name", c_char_p),
- ("inode", POINTER(inode_t)),
- ("parent", POINTER(inode_t)),
- ("gfid", c_ubyte * 16),
- ("pargfid", c_ubyte * 16),
- ]
-
-
-
-def _init_op (a_class, fop, cbk, wind, unwind):
- # Decorators, used by translators. We could pass the signatures as
- # parameters, but it's actually kind of nice to keep them around for
- # inspection.
- a_class.fop_type = CFUNCTYPE(*a_class.fop_sig)
- a_class.cbk_type = CFUNCTYPE(*a_class.cbk_sig)
- # Dispatch-function registration.
- fop.restype = None
- fop.argtypes = [ c_long, a_class.fop_type ]
- # Callback-function registration.
- cbk.restype = None
- cbk.argtypes = [ c_long, a_class.cbk_type ]
- # STACK_WIND function.
- wind.restype = None
- wind.argtypes = list(a_class.fop_sig[1:])
- # STACK_UNWIND function.
- unwind.restype = None
- unwind.argtypes = list(a_class.cbk_sig[1:])
-
-class OpLookup:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(dict_t), POINTER(iatt_t))
-_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
- dl.wind_lookup, dl.unwind_lookup)
-
-class OpCreate:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
- POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(inode_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
- dl.wind_create, dl.unwind_create)
-
-class OpOpen:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(dict_t))
-_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
- dl.wind_open, dl.unwind_open)
-
-class OpReadv:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
- POINTER(iobref_t), POINTER(dict_t))
-_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
- dl.wind_readv, dl.unwind_readv)
-class OpWritev:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
- POINTER(iobref_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
- dl.wind_writev, dl.unwind_writev)
-
-class OpOpendir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(dict_t))
-_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
- dl.wind_opendir, dl.unwind_opendir)
-
-class OpReaddir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
-_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
- dl.wind_readdir, dl.unwind_readdir)
-
-class OpReaddirp:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
-_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
- dl.wind_readdirp, dl.unwind_readdirp)
-
-class OpStat:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
- dl.wind_stat, dl.unwind_stat)
-
-class OpFstat:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
- dl.wind_fstat, dl.unwind_fstat)
-
-class OpStatfs:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
-_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
- dl.wind_statfs, dl.unwind_statfs)
-
-
-class OpSetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t), c_int32,
- POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
- dl.wind_setxattr, dl.unwind_setxattr)
-
-class OpGetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_char_p, POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t), POINTER(dict_t))
-_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
- dl.wind_getxattr, dl.unwind_getxattr)
-
-class OpFsetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(dict_t), c_int32,
- POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
- dl.wind_fsetxattr, dl.unwind_fsetxattr)
-
-class OpFgetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_char_p, POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t), POINTER(dict_t))
-_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
- dl.wind_fgetxattr, dl.unwind_fgetxattr)
-
-class OpRemovexattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_char_p, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
- dl.wind_removexattr, dl.unwind_removexattr)
-
-
-class OpFremovexattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_char_p, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
- dl.wind_fremovexattr, dl.unwind_fremovexattr)
-
-class OpLink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
- dl.wind_link, dl.unwind_link)
-
-class OpSymlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
- dl.wind_symlink, dl.unwind_symlink)
-
-class OpUnlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
- dl.wind_unlink, dl.unwind_unlink)
-
-class OpReadlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_size_t, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
- dl.wind_readlink, dl.unwind_readlink)
-
-class OpMkdir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
- dl.wind_mkdir, dl.unwind_mkdir)
-
-class OpRmdir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
- dl.wind_rmdir, dl.unwind_rmdir)
-
-
-class Translator:
- def __init__ (self, c_this):
- # This is only here to keep references to the stubs we create,
- # because ctypes doesn't and glupy.so can't because it doesn't
- # get a pointer to the actual Python object. It's a dictionary
- # instead of a list in case we ever allow changing fops/cbks
- # after initialization and need to look them up.
- self.stub_refs = {}
- funcs = dir(self.__class__)
- if "lookup_fop" in funcs:
- @OpLookup.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.lookup_fop (frame, this, loc, xdata)
- self.stub_refs["lookup_fop"] = stub
- dl.set_lookup_fop(c_this, stub)
- if "lookup_cbk" in funcs:
- @OpLookup.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, inode,
- buf, xdata, postparent, s=self):
- return s.lookup_cbk(frame, cookie, this, op_ret,
- op_errno, inode, buf, xdata,
- postparent)
- self.stub_refs["lookup_cbk"] = stub
- dl.set_lookup_cbk(c_this, stub)
- if "create_fop" in funcs:
- @OpCreate.fop_type
- def stub (frame, this, loc, flags, mode, umask, fd,
- xdata, s=self):
- return s.create_fop (frame, this, loc, flags,
- mode, umask, fd, xdata)
- self.stub_refs["create_fop"] = stub
- dl.set_create_fop(c_this, stub)
- if "create_cbk" in funcs:
- @OpCreate.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.create_cbk (frame, cookie, this,
- op_ret, op_errno, fd,
- inode, buf, preparent,
- postparent, xdata)
- self.stub_refs["create_cbk"] = stub
- dl.set_create_cbk(c_this, stub)
- if "open_fop" in funcs:
- @OpOpen.fop_type
- def stub (frame, this, loc, flags, fd,
- xdata, s=self):
- return s.open_fop (frame, this, loc, flags,
- fd, xdata)
- self.stub_refs["open_fop"] = stub
- dl.set_open_fop(c_this, stub)
- if "open_cbk" in funcs:
- @OpOpen.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- xdata, s=self):
- return s.open_cbk (frame, cookie, this,
- op_ret, op_errno, fd,
- xdata)
- self.stub_refs["open_cbk"] = stub
- dl.set_open_cbk(c_this, stub)
- if "readv_fop" in funcs:
- @OpReadv.fop_type
- def stub (frame, this, fd, size, offset, flags,
- xdata, s=self):
- return s.readv_fop (frame, this, fd, size,
- offset, flags, xdata)
- self.stub_refs["readv_fop"] = stub
- dl.set_readv_fop(c_this, stub)
- if "readv_cbk" in funcs:
- @OpReadv.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- vector, count, stbuf, iobref, xdata,
- s=self):
- return s.readv_cbk (frame, cookie, this,
- op_ret, op_errno, vector,
- count, stbuf, iobref,
- xdata)
- self.stub_refs["readv_cbk"] = stub
- dl.set_readv_cbk(c_this, stub)
- if "writev_fop" in funcs:
- @OpWritev.fop_type
- def stub (frame, this, fd, vector, count,
- offset, flags, iobref, xdata, s=self):
- return s.writev_fop (frame, this, fd, vector,
- count, offset, flags,
- iobref, xdata)
- self.stub_refs["writev_fop"] = stub
- dl.set_writev_fop(c_this, stub)
- if "writev_cbk" in funcs:
- @OpWritev.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata, s=self):
- return s.writev_cbk (frame, cookie, this,
- op_ret, op_errno, prebuf,
- postbuf, xdata)
- self.stub_refs["writev_cbk"] = stub
- dl.set_writev_cbk(c_this, stub)
- if "opendir_fop" in funcs:
- @OpOpendir.fop_type
- def stub (frame, this, loc, fd, xdata, s=self):
- return s.opendir_fop (frame, this, loc, fd,
- xdata)
- self.stub_refs["opendir_fop"] = stub
- dl.set_opendir_fop(c_this, stub)
- if "opendir_cbk" in funcs:
- @OpOpendir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- xdata, s=self):
- return s.opendir_cbk(frame, cookie, this,
- op_ret, op_errno, fd,
- xdata)
- self.stub_refs["opendir_cbk"] = stub
- dl.set_opendir_cbk(c_this, stub)
- if "readdir_fop" in funcs:
- @OpReaddir.fop_type
- def stub (frame, this, fd, size, offset, xdata, s=self):
- return s.readdir_fop (frame, this, fd, size,
- offset, xdata)
- self.stub_refs["readdir_fop"] = stub
- dl.set_readdir_fop(c_this, stub)
- if "readdir_cbk" in funcs:
- @OpReaddir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- entries, xdata, s=self):
- return s.readdir_cbk(frame, cookie, this,
- op_ret, op_errno, entries,
- xdata)
- self.stub_refs["readdir_cbk"] = stub
- dl.set_readdir_cbk(c_this, stub)
- if "readdirp_fop" in funcs:
- @OpReaddirp.fop_type
- def stub (frame, this, fd, size, offset, xdata, s=self):
- return s.readdirp_fop (frame, this, fd, size,
- offset, xdata)
- self.stub_refs["readdirp_fop"] = stub
- dl.set_readdirp_fop(c_this, stub)
- if "readdirp_cbk" in funcs:
- @OpReaddirp.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- entries, xdata, s=self):
- return s.readdirp_cbk (frame, cookie, this,
- op_ret, op_errno,
- entries, xdata)
- self.stub_refs["readdirp_cbk"] = stub
- dl.set_readdirp_cbk(c_this, stub)
- if "stat_fop" in funcs:
- @OpStat.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.stat_fop (frame, this, loc, xdata)
- self.stub_refs["stat_fop"] = stub
- dl.set_stat_fop(c_this, stub)
- if "stat_cbk" in funcs:
- @OpStat.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.stat_cbk(frame, cookie, this, op_ret,
- op_errno, buf, xdata)
- self.stub_refs["stat_cbk"] = stub
- dl.set_stat_cbk(c_this, stub)
- if "fstat_fop" in funcs:
- @OpFstat.fop_type
- def stub (frame, this, fd, xdata, s=self):
- return s.fstat_fop (frame, this, fd, xdata)
- self.stub_refs["fstat_fop"] = stub
- dl.set_fstat_fop(c_this, stub)
- if "fstat_cbk" in funcs:
- @OpFstat.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.fstat_cbk(frame, cookie, this, op_ret,
- op_errno, buf, xdata)
- self.stub_refs["fstat_cbk"] = stub
- dl.set_fstat_cbk(c_this, stub)
- if "statfs_fop" in funcs:
- @OpStatfs.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.statfs_fop (frame, this, loc, xdata)
- self.stub_refs["statfs_fop"] = stub
- dl.set_statfs_fop(c_this, stub)
- if "statfs_cbk" in funcs:
- @OpStatfs.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.statfs_cbk (frame, cookie, this,
- op_ret, op_errno, buf,
- xdata)
- self.stub_refs["statfs_cbk"] = stub
- dl.set_statfs_cbk(c_this, stub)
- if "setxattr_fop" in funcs:
- @OpSetxattr.fop_type
- def stub (frame, this, loc, dictionary, flags, xdata,
- s=self):
- return s.setxattr_fop (frame, this, loc,
- dictionary, flags,
- xdata)
- self.stub_refs["setxattr_fop"] = stub
- dl.set_setxattr_fop(c_this, stub)
- if "setxattr_cbk" in funcs:
- @OpSetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, xdata,
- s=self):
- return s.setxattr_cbk(frame, cookie, this,
- op_ret, op_errno, xdata)
- self.stub_refs["setxattr_cbk"] = stub
- dl.set_setxattr_cbk(c_this, stub)
- if "getxattr_fop" in funcs:
- @OpGetxattr.fop_type
- def stub (frame, this, loc, name, xdata, s=self):
- return s.getxattr_fop (frame, this, loc, name,
- xdata)
- self.stub_refs["getxattr_fop"] = stub
- dl.set_getxattr_fop(c_this, stub)
- if "getxattr_cbk" in funcs:
- @OpGetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- dictionary, xdata, s=self):
- return s.getxattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- dictionary, xdata)
- self.stub_refs["getxattr_cbk"] = stub
- dl.set_getxattr_cbk(c_this, stub)
- if "fsetxattr_fop" in funcs:
- @OpFsetxattr.fop_type
- def stub (frame, this, fd, dictionary, flags, xdata,
- s=self):
- return s.fsetxattr_fop (frame, this, fd,
- dictionary, flags,
- xdata)
- self.stub_refs["fsetxattr_fop"] = stub
- dl.set_fsetxattr_fop(c_this, stub)
- if "fsetxattr_cbk" in funcs:
- @OpFsetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, xdata,
- s=self):
- return s.fsetxattr_cbk(frame, cookie, this,
- op_ret, op_errno, xdata)
- self.stub_refs["fsetxattr_cbk"] = stub
- dl.set_fsetxattr_cbk(c_this, stub)
- if "fgetxattr_fop" in funcs:
- @OpFgetxattr.fop_type
- def stub (frame, this, fd, name, xdata, s=self):
- return s.fgetxattr_fop (frame, this, fd, name,
- xdata)
- self.stub_refs["fgetxattr_fop"] = stub
- dl.set_fgetxattr_fop(c_this, stub)
- if "fgetxattr_cbk" in funcs:
- @OpFgetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- dictionary, xdata, s=self):
- return s.fgetxattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- dictionary, xdata)
- self.stub_refs["fgetxattr_cbk"] = stub
- dl.set_fgetxattr_cbk(c_this, stub)
- if "removexattr_fop" in funcs:
- @OpRemovexattr.fop_type
- def stub (frame, this, loc, name, xdata, s=self):
- return s.removexattr_fop (frame, this, loc,
- name, xdata)
- self.stub_refs["removexattr_fop"] = stub
- dl.set_removexattr_fop(c_this, stub)
- if "removexattr_cbk" in funcs:
- @OpRemovexattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- xdata, s=self):
- return s.removexattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- xdata)
- self.stub_refs["removexattr_cbk"] = stub
- dl.set_removexattr_cbk(c_this, stub)
- if "fremovexattr_fop" in funcs:
- @OpFremovexattr.fop_type
- def stub (frame, this, fd, name, xdata, s=self):
- return s.fremovexattr_fop (frame, this, fd,
- name, xdata)
- self.stub_refs["fremovexattr_fop"] = stub
- dl.set_fremovexattr_fop(c_this, stub)
- if "fremovexattr_cbk" in funcs:
- @OpFremovexattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- xdata, s=self):
- return s.fremovexattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- xdata)
- self.stub_refs["fremovexattr_cbk"] = stub
- dl.set_fremovexattr_cbk(c_this, stub)
- if "link_fop" in funcs:
- @OpLink.fop_type
- def stub (frame, this, oldloc, newloc,
- xdata, s=self):
- return s.link_fop (frame, this, oldloc,
- newloc, xdata)
- self.stub_refs["link_fop"] = stub
- dl.set_link_fop(c_this, stub)
- if "link_cbk" in funcs:
- @OpLink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.link_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["link_cbk"] = stub
- dl.set_link_cbk(c_this, stub)
- if "symlink_fop" in funcs:
- @OpSymlink.fop_type
- def stub (frame, this, linkname, loc,
- umask, xdata, s=self):
- return s.symlink_fop (frame, this, linkname,
- loc, umask, xdata)
- self.stub_refs["symlink_fop"] = stub
- dl.set_symlink_fop(c_this, stub)
- if "symlink_cbk" in funcs:
- @OpSymlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.symlink_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["symlink_cbk"] = stub
- dl.set_symlink_cbk(c_this, stub)
- if "unlink_fop" in funcs:
- @OpUnlink.fop_type
- def stub (frame, this, loc, xflags,
- xdata, s=self):
- return s.unlink_fop (frame, this, loc,
- xflags, xdata)
- self.stub_refs["unlink_fop"] = stub
- dl.set_unlink_fop(c_this, stub)
- if "unlink_cbk" in funcs:
- @OpUnlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata, s=self):
- return s.unlink_cbk (frame, cookie, this,
- op_ret, op_errno,
- preparent, postparent,
- xdata)
- self.stub_refs["unlink_cbk"] = stub
- dl.set_unlink_cbk(c_this, stub)
- if "readlink_fop" in funcs:
- @OpReadlink.fop_type
- def stub (frame, this, loc, size,
- xdata, s=self):
- return s.readlink_fop (frame, this, loc,
- size, xdata)
- self.stub_refs["readlink_fop"] = stub
- dl.set_readlink_fop(c_this, stub)
- if "readlink_cbk" in funcs:
- @OpReadlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- path, buf, xdata, s=self):
- return s.readlink_cbk (frame, cookie, this,
- op_ret, op_errno,
- path, buf, xdata)
- self.stub_refs["readlink_cbk"] = stub
- dl.set_readlink_cbk(c_this, stub)
- if "mkdir_fop" in funcs:
- @OpMkdir.fop_type
- def stub (frame, this, loc, mode, umask, xdata,
- s=self):
- return s.mkdir_fop (frame, this, loc, mode,
- umask, xdata)
- self.stub_refs["mkdir_fop"] = stub
- dl.set_mkdir_fop(c_this, stub)
- if "mkdir_cbk" in funcs:
- @OpMkdir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata, s=self):
- return s.mkdir_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["mkdir_cbk"] = stub
- dl.set_mkdir_cbk(c_this, stub)
- if "rmdir_fop" in funcs:
- @OpRmdir.fop_type
- def stub (frame, this, loc, xflags,
- xdata, s=self):
- return s.rmdir_fop (frame, this, loc,
- xflags, xdata)
- self.stub_refs["rmdir_fop"] = stub
- dl.set_rmdir_fop(c_this, stub)
- if "rmdir_cbk" in funcs:
- @OpRmdir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata, s=self):
- return s.rmdir_cbk (frame, cookie, this,
- op_ret, op_errno,
- preparent, postparent,
- xdata)
- self.stub_refs["rmdir_cbk"] = stub
- dl.set_rmdir_cbk(c_this, stub)
diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in
deleted file mode 100644
index 611e9695f76..00000000000
--- a/xlators/features/glupy/src/setup.py.in
+++ /dev/null
@@ -1,24 +0,0 @@
-from distutils.core import setup
-
-DESC = """GlusterFS is a distributed file-system capable of scaling to
-several petabytes. It aggregates various storage bricks over Infiniband
-RDMA or TCP/IP interconnect into one large parallel network file system.
-GlusterFS is one of the most sophisticated file systems in terms of
-features and extensibility. It borrows a powerful concept called
-Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
-user space and easily manageable.
-
-This package contains Glupy, the Python translator interface for GlusterFS."""
-
-setup(
- name='glusterfs-glupy',
- version='@PACKAGE_VERSION@',
- description='Glupy is the Python translator interface for GlusterFS',
- long_description=DESC,
- author='Gluster Community',
- author_email='gluster-devel@gluster.org',
- license='LGPLv3',
- url='http://gluster.org/',
- package_dir={'gluster':''},
- packages=['gluster']
-)
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
index f5d456e84be..58833d0ec9b 100644
--- a/xlators/features/index/src/index-mem-types.h
+++ b/xlators/features/index/src/index-mem-types.h
@@ -8,16 +8,16 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __QUIESCE_MEM_TYPES_H__
-#define __QUIESCE_MEM_TYPES_H__
+#ifndef __INDEX_MEM_TYPES_H__
+#define __INDEX_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_index_mem_types_ {
gf_index_mt_priv_t = gf_common_mt_end + 1,
- gf_index_inode_ctx_t = gf_common_mt_end + 2,
- gf_index_fd_ctx_t = gf_common_mt_end + 3,
- gf_index_mt_local_t = gf_common_mt_end + 4,
+ gf_index_inode_ctx_t,
+ gf_index_fd_ctx_t,
+ gf_index_mt_local_t,
gf_index_mt_end
};
#endif
diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h
index 3495fb080f0..364f17cd34e 100644
--- a/xlators/features/index/src/index-messages.h
+++ b/xlators/features/index/src/index-messages.h
@@ -11,7 +11,7 @@
#ifndef _INDEX_MESSAGES_H_
#define _INDEX_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index c487e8a5726..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
#include "index.h"
-#include "options.h"
+#include <glusterfs/options.h>
#include "glusterfs3-xdr.h"
-#include "syscall.h"
-#include "syncop.h"
-#include "common-utils.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/common-utils.h>
#include "index-messages.h"
#include <ftw.h>
#include <libgen.h> /* for dirname() */
@@ -1685,21 +1685,25 @@ index_get_gfid_type(void *opaque)
loc_wipe(&loc);
- entry->d_type = IA_INVAL;
+ entry->d_type = gf_d_type_from_ia_type(IA_INVAL);
+ entry->d_stat.ia_type = IA_INVAL;
if (gf_uuid_parse(entry->d_name, loc.gfid))
continue;
loc.inode = inode_find(args->parent->table, loc.gfid);
if (loc.inode) {
- entry->d_type = loc.inode->ia_type;
+ entry->d_stat.ia_type = loc.inode->ia_type;
+ entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type);
continue;
}
loc.inode = inode_new(args->parent->table);
if (!loc.inode)
continue;
ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0);
- if (ret == 0)
- entry->d_type = iatt.ia_type;
+ if (ret == 0) {
+ entry->d_type = gf_d_type_from_ia_type(iatt.ia_type);
+ entry->d_stat = iatt;
+ }
}
loc_wipe(&loc);
@@ -2100,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
worker_enqueue(this, stub);
return 0;
normal:
- ret = dict_get_str(xattr_req, "link-count", &flag);
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2474,6 +2478,7 @@ out:
GF_FREE(priv);
this->private = NULL;
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
if (attr_inited)
@@ -2587,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...)
if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait for draining stub from queue before notify PARENT_DOWN */
@@ -2661,3 +2666,17 @@ struct volume_options options[] = {
.default_value = "trusted.afr.{{ volume.name }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "index",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
index 149cfd415b3..a2b6e6e2570 100644
--- a/xlators/features/index/src/index.h
+++ b/xlators/features/index/src/index.h
@@ -11,11 +11,11 @@
#ifndef __INDEX_H__
#define __INDEX_H__
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "index-mem-types.h"
#define INDEX_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
index 2823ca5a262..56dee244281 100644
--- a/xlators/features/leases/src/leases-internal.c
+++ b/xlators/features/leases/src/leases-internal.c
@@ -73,7 +73,7 @@ out:
* timeout value(in seconds) set as an option to this xlator.
* -1 error case
*/
-int32_t
+static int32_t
get_recall_lease_timeout(xlator_t *this)
{
leases_private_t *priv = NULL;
@@ -356,9 +356,8 @@ out:
static lease_inode_t *
new_lease_inode(inode_t *inode)
{
- lease_inode_t *l_inode = NULL;
-
- l_inode = GF_CALLOC(1, sizeof(*l_inode), gf_leases_mt_lease_inode_t);
+ lease_inode_t *l_inode = GF_MALLOC(sizeof(*l_inode),
+ gf_leases_mt_lease_inode_t);
if (!l_inode)
goto out;
@@ -379,9 +378,8 @@ __destroy_lease_inode(lease_inode_t *l_inode)
static lease_client_t *
new_lease_client(const char *client_uid)
{
- lease_client_t *clnt = NULL;
-
- clnt = GF_CALLOC(1, sizeof(*clnt), gf_leases_mt_lease_client_t);
+ lease_client_t *clnt = GF_MALLOC(sizeof(*clnt),
+ gf_leases_mt_lease_client_t);
if (!clnt)
goto out;
@@ -448,29 +446,29 @@ out:
static int
add_inode_to_client_list(xlator_t *this, inode_t *inode, const char *client_uid)
{
- int ret = 0;
- leases_private_t *priv = NULL;
+ leases_private_t *priv = this->private;
lease_client_t *clnt = NULL;
- lease_inode_t *lease_inode = NULL;
- priv = this->private;
+ lease_inode_t *lease_inode = new_lease_inode(inode);
+ if (!lease_inode)
+ return -ENOMEM;
+
pthread_mutex_lock(&priv->mutex);
{
clnt = __get_or_new_lease_client(this, priv, client_uid);
- GF_CHECK_ALLOC(clnt, ret, out);
-
- lease_inode = new_lease_inode(inode);
- GF_CHECK_ALLOC(lease_inode, ret, out);
-
+ if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
+ __destroy_lease_inode(lease_inode);
+ return -ENOMEM;
+ }
list_add_tail(&clnt->inode_list, &lease_inode->list);
- gf_msg_debug(this->name, 0,
- "Added a new inode:%p to the client(%s) "
- "cleanup list, gfid(%s)",
- inode, client_uid, uuid_utoa(inode->gfid));
}
-out:
pthread_mutex_unlock(&priv->mutex);
- return ret;
+ gf_msg_debug(this->name, 0,
+ "Added a new inode:%p to the client(%s) "
+ "cleanup list, gfid(%s)",
+ inode, client_uid, uuid_utoa(inode->gfid));
+ return 0;
}
/* Add lease entry to the corresponding client entry.
@@ -587,15 +585,17 @@ remove_from_clnt_list(xlator_t *this, const char *client_uid, inode_t *inode)
{
clnt = __get_lease_client(this, priv, client_uid);
if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND,
"There is no client entry found in the cleanup list");
- pthread_mutex_unlock(&priv->mutex);
goto out;
}
ret = __remove_inode_from_clnt_list(this, clnt, inode);
if (ret) {
+ pthread_mutex_unlock(&priv->mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND,
"There is no inode entry found in the cleanup list");
+ goto out;
}
}
pthread_mutex_unlock(&priv->mutex);
@@ -676,6 +676,7 @@ __remove_lease(xlator_t *this, inode_t *inode, lease_inode_ctx_t *lease_ctx,
if (lease_ctx->lease_cnt == 0 && lease_ctx->timer) {
ret = gf_tw_del_timer(priv->timer_wheel, lease_ctx->timer);
lease_ctx->recall_in_progress = _gf_false;
+ lease_ctx->timer = NULL;
}
out:
return ret;
@@ -853,20 +854,20 @@ recall_lease_timer_handler(struct gf_tw_timer_list *timer, void *data,
priv = timer_data->this->private;
inode = timer_data->inode;
+ lease_inode = new_lease_inode(inode);
+ if (!lease_inode) {
+ errno = ENOMEM;
+ goto out;
+ }
pthread_mutex_lock(&priv->mutex);
{
- lease_inode = new_lease_inode(inode);
- if (!lease_inode) {
- errno = ENOMEM;
- goto out;
- }
list_add_tail(&lease_inode->list, &priv->recall_list);
pthread_cond_broadcast(&priv->cond);
}
+ pthread_mutex_unlock(&priv->mutex);
out:
/* unref the inode_ref taken by timer_data in __recall_lease */
inode_unref(timer_data->inode);
- pthread_mutex_unlock(&priv->mutex);
GF_FREE(timer);
}
@@ -886,6 +887,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
struct gf_tw_timer_list *timer = NULL;
leases_private_t *priv = NULL;
lease_timer_data_t *timer_data = NULL;
+ time_t recall_time;
if (lease_ctx->recall_in_progress) {
gf_msg_debug(this->name, 0,
@@ -895,6 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
priv = this->private;
+ recall_time = gf_time();
list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
lease_id_list)
{
@@ -918,9 +921,9 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
lease_ctx->recall_in_progress = _gf_true;
- lease_entry->recall_time = time(NULL);
+ lease_entry->recall_time = recall_time;
}
- timer = GF_CALLOC(1, sizeof(*timer), gf_common_mt_tw_timer_list);
+ timer = GF_MALLOC(sizeof(*timer), gf_common_mt_tw_timer_list);
if (!timer) {
goto out;
}
@@ -1068,6 +1071,17 @@ __check_lease_conflict(call_frame_t *frame, lease_inode_ctx_t *lease_ctx,
goto recall;
}
+ /* As internal fops are used to maintain data integrity but do not
+ * make modififications to the client data, no need to conflict with
+ * them.
+ *
+ * @todo: like for locks, even lease state has to be handled by
+ * rebalance or self-heal daemon process. */
+ if (frame->root->pid < 0) {
+ conflicts = _gf_false;
+ goto recall;
+ }
+
/* If lease_id is not sent, set conflicts = true if there is
* an existing lease */
if (!lease_id && (lease_ctx->lease_cnt > 0)) {
@@ -1134,12 +1148,13 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
pthread_mutex_lock(&lease_ctx->lock);
{
if (lease_ctx->lease_type == NONE) {
+ pthread_mutex_unlock(&lease_ctx->lock);
gf_msg_debug(frame->this->name, 0,
"No leases found continuing with the"
" fop:%s",
gf_fop_list[frame->root->op]);
ret = WIND_FOP;
- goto unlock;
+ goto out;
}
conflicts = __check_lease_conflict(frame, lease_ctx, lease_id,
is_write_fop);
@@ -1166,7 +1181,6 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
}
}
}
-unlock:
pthread_mutex_unlock(&lease_ctx->lock);
out:
return ret;
@@ -1343,6 +1357,7 @@ expired_recall_cleanup(void *data)
lease_inode_t *tmp = NULL;
leases_private_t *priv = NULL;
xlator_t *this = NULL;
+ time_t time_now;
GF_VALIDATE_OR_GOTO("leases", data, out);
@@ -1352,6 +1367,7 @@ expired_recall_cleanup(void *data)
gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
while (1) {
+ time_now = gf_time();
pthread_mutex_lock(&priv->mutex);
{
if (priv->fini) {
@@ -1360,7 +1376,7 @@ expired_recall_cleanup(void *data)
}
INIT_LIST_HEAD(&recall_cleanup_list);
if (list_empty(&priv->recall_list)) {
- sleep_till.tv_sec = time(NULL) + 600;
+ sleep_till.tv_sec = time_now + 600;
pthread_cond_timedwait(&priv->cond, &priv->mutex, &sleep_till);
}
if (!list_empty(&priv->recall_list)) {
diff --git a/xlators/features/leases/src/leases-mem-types.h b/xlators/features/leases/src/leases-mem-types.h
index 59d3cbaf0b3..25664b44156 100644
--- a/xlators/features/leases/src/leases-mem-types.h
+++ b/xlators/features/leases/src/leases-mem-types.h
@@ -11,11 +11,10 @@
#ifndef __LEASES_MEM_TYPES_H__
#define __LEASES_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_leases_mem_types_ {
- gf_leases_mt_conf_t = gf_common_mt_end + 1,
- gf_leases_mt_private_t,
+ gf_leases_mt_private_t = gf_common_mt_end + 1,
gf_leases_mt_lease_client_t,
gf_leases_mt_lease_inode_t,
gf_leases_mt_fd_ctx_t,
diff --git a/xlators/features/leases/src/leases-messages.h b/xlators/features/leases/src/leases-messages.h
index 81a517f63cd..da696b832de 100644
--- a/xlators/features/leases/src/leases-messages.h
+++ b/xlators/features/leases/src/leases-messages.h
@@ -11,7 +11,7 @@
#ifndef _LEASES_MESSAGES_H_
#define _LEASES_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c
index baeb8c7361c..04bee50ba3f 100644
--- a/xlators/features/leases/src/leases.c
+++ b/xlators/features/leases/src/leases.c
@@ -35,6 +35,7 @@ leases_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
char *lease_id = NULL;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_leases_mt_fd_ctx_t);
if (!fd_ctx) {
@@ -109,6 +110,7 @@ leases_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -157,6 +159,7 @@ leases_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -202,6 +205,7 @@ leases_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS_LK(cmd, flock->l_type, fd->flags);
@@ -240,6 +244,7 @@ leases_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t op_ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
ret = process_lease_req(frame, this, loc->inode, lease);
if (ret < 0) {
@@ -282,6 +287,7 @@ leases_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -328,6 +334,7 @@ leases_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -376,6 +383,7 @@ leases_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
/* should the lease be also checked for newloc */
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
@@ -424,6 +432,7 @@ leases_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -470,6 +479,7 @@ leases_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -516,6 +526,7 @@ leases_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, flags);
@@ -563,6 +574,7 @@ leases_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -607,6 +619,7 @@ leases_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0); /* TODO:fd->flags?*/
@@ -652,6 +665,7 @@ leases_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -697,6 +711,7 @@ leases_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -744,6 +759,7 @@ leases_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -789,6 +805,7 @@ leases_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -834,6 +851,7 @@ leases_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
uint64_t ctx = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -1000,14 +1018,14 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
leases_private_t *priv = NULL;
priv = this->private;
if (!priv) {
- return 0;
+ return;
}
this->private = NULL;
@@ -1019,12 +1037,12 @@ fini(xlator_t *this)
priv->inited_recall_thr = _gf_false;
}
- GF_FREE(priv);
- if (this->ctx->tw) {
+ if (priv->timer_wheel) {
glusterfs_ctx_tw_put(this->ctx);
- this->ctx->tw = NULL;
}
- return 0;
+
+ GF_FREE(priv);
+ return;
}
static int
@@ -1135,3 +1153,16 @@ struct volume_options options[] = {
" will be forcefully purged by the server."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "leases",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h
index 6ac712b0bb0..a6e8a6824cc 100644
--- a/xlators/features/leases/src/leases.h
+++ b/xlators/features/leases/src/leases.h
@@ -16,15 +16,15 @@
#include "config.h"
#endif
-#include "common-utils.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "call-stub.h"
-#include "logging.h"
-#include "client_t.h"
-#include "lkowner.h"
-#include "locking.h"
-#include "upcall-utils.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/client_t.h>
+#include <glusterfs/lkowner.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/upcall-utils.h>
#include "timer-wheel.h"
#include "leases-mem-types.h"
#include "leases-messages.h"
@@ -45,6 +45,14 @@
goto label; \
} while (0)
+#define EXIT_IF_INTERNAL_FOP(frame, xdata, label) \
+ do { \
+ if (frame->root->pid < 0) \
+ goto label; \
+ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
#define GET_LEASE_ID(xdata, lease_id, client_uid) \
do { \
int ret_val = -1; \
@@ -144,17 +152,19 @@
} while (0)
struct _leases_private {
- gf_boolean_t leases_enabled;
- int32_t recall_lease_timeout;
struct list_head client_list;
struct list_head recall_list;
struct tvec_base *timer_wheel; /* timer wheel where the recall request
is qued and waits for unlock/expiry */
- gf_boolean_t fini;
pthread_t recall_thr;
- gf_boolean_t inited_recall_thr;
pthread_mutex_t mutex;
pthread_cond_t cond;
+ int32_t recall_lease_timeout;
+ gf_boolean_t inited_recall_thr;
+ gf_boolean_t fini;
+ gf_boolean_t leases_enabled;
+
+ char _pad[1]; /* manual padding */
};
typedef struct _leases_private leases_private_t;
@@ -181,18 +191,20 @@ typedef struct _lease_fd_ctx lease_fd_ctx_t;
struct _lease_inode_ctx {
struct list_head lease_id_list; /* clients that have taken leases */
int lease_type_cnt[GF_LEASE_MAX_TYPE + 1];
+ uint64_t lease_cnt; /* Total number of leases on this inode */
+ uint64_t openfd_cnt; /* number of fds open */
+ struct list_head blocked_list; /* List of fops blocked until the
+ lease recall is complete */
+ inode_t *inode; /* this represents the inode on which the
+ lock was taken, required mainly during
+ disconnect cleanup */
+ struct gf_tw_timer_list *timer;
+ pthread_mutex_t lock;
int lease_type; /* Types of leases acquired */
- uint64_t lease_cnt; /* Total number of leases on this inode */
- uint64_t openfd_cnt; /* number of fds open */
gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */
gf_boolean_t blocked_fops_resuming; /* if blocked fops are being resumed */
- struct list_head blocked_list; /* List of fops blocked until the
- lease recall is complete */
- inode_t *inode; /* this represents the inode on which the
- lock was taken, required mainly during
- disconnect cleanup */
- struct gf_tw_timer_list *timer;
- pthread_mutex_t lock;
+
+ char _pad[2]; /* manual padding */
};
typedef struct _lease_inode_ctx lease_inode_ctx_t;
@@ -202,11 +214,12 @@ struct _lease_id_entry {
char *client_uid; /* uid of the client that has
taken the lease */
int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; /* count of each lease type */
- int lease_type; /* Union of all the leases taken
- under the given lease id */
uint64_t lease_cnt; /* Number of leases taken under the
given lease id */
time_t recall_time; /* time @ which recall was sent */
+ int lease_type; /* Union of all the leases taken
+ under the given lease id */
+ char _pad[4]; /* manual padding */
};
typedef struct _lease_id_entry lease_id_entry_t;
@@ -226,9 +239,6 @@ typedef struct __lease_timer_data lease_timer_data_t;
gf_boolean_t
is_leases_enabled(xlator_t *this);
-int32_t
-get_recall_lease_timeout(xlator_t *this);
-
lease_inode_ctx_t *
lease_ctx_get(inode_t *inode, xlator_t *this);
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 0966ee753d6..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -12,17 +12,23 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
+const char *clrlk_type_names[CLRLK_TYPE_MAX] = {
+ [CLRLK_INODE] = "inode",
+ [CLRLK_ENTRY] = "entry",
+ [CLRLK_POSIX] = "posix",
+};
+
int
clrlk_get_kind(char *kind)
{
@@ -175,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
if (plock->blocked) {
bcount++;
pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
- &plock->user_flock, -1, EAGAIN, NULL);
+ &plock->user_flock, -1, EINTR, NULL);
- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
&plock->user_flock, NULL);
} else {
@@ -254,14 +260,16 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
- {
- list_del_init(&ilock->blocked_locks);
- pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
- &ilock->user_flock, -1, EAGAIN, ilock->volume);
- STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
- // No need to take lock as the locks are only in one list
- __pl_inodelk_unref(ilock);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&ilock->blocked_locks);
+ pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN, ilock->volume);
+ STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
+ // No need to take lock as the locks are only in one list
+ __pl_inodelk_unref(ilock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
@@ -357,15 +365,17 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
- {
- list_del_init(&elock->blocked_locks);
- entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
- elock->basename, ENTRYLK_LOCK, elock->type, -1,
- EAGAIN);
- STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&elock->blocked_locks);
+ entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type, -1,
+ EAGAIN);
+ STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
- __pl_entrylk_unref(elock);
+ __pl_entrylk_unref(elock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
index 08662746f98..bc118cb1b81 100644
--- a/xlators/features/locks/src/clear.h
+++ b/xlators/features/locks/src/clear.h
@@ -10,9 +10,9 @@
#ifndef __CLEAR_H__
#define __CLEAR_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks.h"
typedef enum {
@@ -22,6 +22,8 @@ typedef enum {
CLRLK_TYPE_MAX
} clrlk_type;
+extern const char *clrlk_type_names[];
+
typedef enum {
CLRLK_BLOCKED = 1,
CLRLK_GRANTED,
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 5ad5415ed79..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -12,11 +12,10 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syncop.h>
#include "locks.h"
#include "common.h"
@@ -213,13 +212,11 @@ void
pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -291,13 +288,11 @@ pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
int cmd, struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -326,7 +321,7 @@ pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd)
if (!priv->trace)
return;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
if (pl_inode && __pl_inode_is_empty(pl_inode))
return;
@@ -362,7 +357,7 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
int need_unref = 0;
int need_ref = 0;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode)
return;
@@ -389,8 +384,51 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
inode_ref(inode);
}
+/* Get lock enforcement info from disk */
+int
+pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode,
+ pl_local_t *local)
+{
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ if (!local) {
+ return -1;
+ }
+
+ if (local->fd) {
+ op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ } else {
+ op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op_ret >= 0) {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0,
+ "getxattr failed with %d", op_ret);
+ pl_inode->mlock_enforced = _gf_false;
+
+ if (-op_ret == ENODATA) {
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ pl_inode->check_mlock_info = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ return ret;
+}
+
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode)
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
{
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
@@ -403,6 +441,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
goto unlock;
}
+
pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t);
if (!pl_inode) {
goto unlock;
@@ -411,6 +450,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode");
pthread_mutex_init(&pl_inode->mutex, NULL);
+ pthread_cond_init(&pl_inode->check_fop_wind_count, 0);
INIT_LIST_HEAD(&pl_inode->dom_list);
INIT_LIST_HEAD(&pl_inode->ext_list);
@@ -420,8 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&pl_inode->blocked_calls);
INIT_LIST_HEAD(&pl_inode->metalk_list);
INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
gf_uuid_copy(pl_inode->gfid, inode->gfid);
+ pl_inode->check_mlock_info = _gf_true;
+ pl_inode->mlock_enforced = _gf_false;
+
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
pthread_mutex_destroy(&pl_inode->mutex);
@@ -433,13 +481,23 @@ pl_inode_get(xlator_t *this, inode_t *inode)
unlock:
UNLOCK(&inode->lock);
+ if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) &&
+ pl_inode->check_mlock_info && local) {
+ /* Note: The lock enforcement information per file can be stored in the
+ attribute flag of stat(x) in posix. With that there won't be a need
+ for doing getxattr post a reboot
+ */
+ pl_fetch_mlock_info_from_disk(this, pl_inode, local);
+ }
+
return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking)
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno)
{
posix_lock_t *lock = NULL;
@@ -447,8 +505,14 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
GF_VALIDATE_OR_GOTO("posix-locks", client, out);
GF_VALIDATE_OR_GOTO("posix-locks", fd, out);
+ if (!pl_is_lk_owner_valid(owner, client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
if (!lock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -466,6 +530,7 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
if (lock->client_uid == NULL) {
GF_FREE(lock);
lock = NULL;
+ *op_errno = ENOMEM;
goto out;
}
@@ -540,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
@@ -902,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
-
+ pl_local_t *local = NULL;
INIT_LIST_HEAD(&granted_list);
pthread_mutex_lock(&pl_inode->mutex);
@@ -917,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -934,10 +997,12 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
0,
};
posix_lock_t *unlock_lock = NULL;
+ int32_t op_errno = 0;
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
+ pl_local_t *local = NULL;
int ret = -1;
@@ -951,7 +1016,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid,
&old_lock->owner, old_lock->fd,
- old_lock->lk_flags, 0);
+ old_lock->lk_flags, 0, &op_errno);
GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out);
ret = 0;
@@ -965,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -1002,7 +1067,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
if (__is_lock_grantable(pl_inode, lock)) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1015,7 +1080,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
__insert_and_merge(pl_inode, lock);
} else if (can_block) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1026,6 +1091,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, NULL);
+
lock->blocked = 1;
__insert_lock(pl_inode, lock);
ret = -1;
@@ -1052,10 +1121,7 @@ out:
posix_lock_t *
pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
-
- conf = first_conflicting_overlap(pl_inode, lock);
-
+ posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
return lock;
@@ -1077,3 +1143,449 @@ pl_does_monkey_want_stuck_lock()
return _gf_true;
return _gf_false;
}
+
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *i = NULL;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *itr = NULL;
+ struct list_head unwind_blist = {
+ 0,
+ };
+ struct list_head unwind_rw_list = {
+ 0,
+ };
+ int ret = 0;
+
+ INIT_LIST_HEAD(&unwind_blist);
+ INIT_LIST_HEAD(&unwind_rw_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /*
+ - go through the lock list
+ - remove all locks from different owners
+ - same owner locks will be added or substracted based on
+ the new request
+ - add the new lock
+ */
+ list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list)
+ {
+ if (lock->blocked) {
+ list_del_init(&lock->list);
+ list_add(&lock->list, &unwind_blist);
+ continue;
+ }
+
+ if (locks_overlap(lock, reqlock)) {
+ if (same_owner(lock, reqlock))
+ continue;
+
+ /* remove conflicting locks */
+ list_del_init(&lock->list);
+ __delete_lock(lock);
+ __destroy_lock(lock);
+ }
+ }
+
+ __insert_and_merge(pl_inode, reqlock);
+
+ list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list)
+ {
+ list_del_init(&rw->list);
+ list_add(&rw->list, &unwind_rw_list);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ /* unwind blocked locks */
+ list_for_each_entry_safe(lock, i, &unwind_blist, list)
+ {
+ PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk,
+ lock->frame, -1, EBUSY, &lock->user_flock,
+ NULL);
+ __destroy_lock(lock);
+ }
+
+ /* unwind blocked IOs */
+ list_for_each_entry_safe(rw, itr, &unwind_rw_list, list)
+ {
+ pl_clean_local(rw->stub->frame->local);
+ call_unwind_error(rw->stub, -1, EBUSY);
+ }
+
+ return ret;
+}
+
+/* Return true in case we need to ensure mandatory-locking
+ * semantics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = THIS->private;
+
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+pl_clean_local(pl_local_t *local)
+{
+ if (!local)
+ return;
+
+ if (local->inodelk_dom_count_req)
+ data_unref(local->inodelk_dom_count_req);
+ loc_wipe(&local->loc[0]);
+ loc_wipe(&local->loc[1]);
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->inode)
+ inode_unref(local->inode);
+ mem_put(local);
+}
+
+/*
+TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here
+*/
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ pl_local_t *local = NULL;
+
+ if (!loc && !fd) {
+ return -1;
+ }
+
+ if (!frame->local) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "mem allocation failed");
+ return -1;
+ }
+
+ local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode));
+
+ frame->local = local;
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
+{
+ if (client && (client->opversion < GD_OP_VERSION_7_0)) {
+ return _gf_true;
+ }
+
+ if (is_lk_owner_null(owner)) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
+
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
+
+done:
+ *pinode = inode;
+
+ return error;
+}
+
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
+
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
+
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+done:
+ *ppl_inode = pl_inode;
+
+ return error;
+}
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
+}
+
+void
+pl_inode_remove_wake(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
+ }
+ }
+
+ pl_inode->remove_running--;
+
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
+{
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
+
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
+
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index c3d0e361933..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -10,7 +10,6 @@
#ifndef __COMMON_H__
#define __COMMON_H__
-#include "lkowner.h"
/*dump locks format strings */
#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
#define ENTRY_FMT "type=%s on basename=%s"
@@ -32,12 +31,34 @@
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
+ if (__local) { \
+ if (__local->inodelk_dom_count_req) \
+ data_unref(__local->inodelk_dom_count_req); \
+ loc_wipe(&__local->loc[0]); \
+ loc_wipe(&__local->loc[1]); \
+ if (__local->fd) \
+ fd_unref(__local->fd); \
+ if (__local->inode) \
+ inode_unref(__local->inode); \
+ if (__local->xdata) { \
+ dict_unref(__local->xdata); \
+ __local->xdata = NULL; \
+ } \
+ mem_put(__local); \
+ } \
+ } while (0)
+
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int can_block);
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno);
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode);
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local);
posix_lock_t *
pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
@@ -45,6 +66,9 @@ pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
int
pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block);
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
void
grant_blocked_locks(xlator_t *this, pl_inode_t *inode);
@@ -81,6 +105,15 @@ void
__pl_inodelk_unref(pl_inode_lock_t *lock);
void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
+
+void
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
+
+void
grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
pl_dom_list_t *dom, struct timespec *now,
struct list_head *contend);
@@ -177,9 +210,53 @@ __pl_entrylk_unref(pl_entry_lock_t *lock);
int
pl_metalock_is_active(pl_inode_t *pl_inode);
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
gf_boolean_t
pl_does_monkey_want_stuck_lock();
+
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode);
+
+void
+pl_clean_local(pl_local_t *local);
+
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 60e7bed2a44..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -7,13 +7,13 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
@@ -39,13 +39,20 @@ __pl_entrylk_ref(pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id,
+ int32_t *op_errno)
{
pl_entry_lock_t *newlock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t),
gf_locks_mt_pl_entry_lock_t);
if (!newlock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -114,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
@@ -122,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -197,9 +202,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
- struct timespec *now)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -209,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -218,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -231,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -325,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
break;
}
}
- if (__entrylk_needs_contention_notify(this, tmp, now)) {
- list_add_tail(&tmp->contend, contend);
- }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
}
@@ -539,19 +542,17 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
if (nonblock)
goto out;
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
pinode, lock->basename);
+ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename,
+ ENTRYLK_LOCK, lock->type);
out:
return -EAGAIN;
}
@@ -605,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
@@ -644,11 +645,10 @@ int32_t
check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename)
{
int32_t entrylk = 0;
- pl_inode_t *pinode = 0;
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *conf = NULL;
- pinode = pl_inode_get(this, parent);
+ pl_inode_t *pinode = pl_inode_get(this, parent, NULL);
if (!pinode)
goto out;
pthread_mutex_lock(&pinode->mutex);
@@ -689,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
/* Grants locks if possible which are blocked on a lock */
@@ -770,7 +769,7 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
if (xdata)
dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto out;
@@ -794,10 +793,9 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type);
reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame,
- conn_id);
+ conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -933,8 +931,6 @@ out:
op_ret, op_errno);
unwind:
STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL);
- } else {
- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type);
}
if (pcontend != NULL) {
@@ -1072,32 +1068,36 @@ pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain(pinode, l->volume);
+ dom = get_domain(pinode, l->volume);
- grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
+ grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
- pthread_mutex_lock(&pinode->mutex);
- {
- __pl_entrylk_unref(l);
- }
- pthread_mutex_unlock(&pinode->mutex);
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ __pl_entrylk_unref(l);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
- inode_unref(pinode->inode);
+ inode_unref(pinode->inode);
+ }
}
if (pcontend != NULL) {
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index b8e005038d8..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -7,18 +7,16 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
#include "common.h"
-#include "pl-messages.h"
void
__delete_inode_lock(pl_inode_lock_t *lock)
@@ -142,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -231,9 +227,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
- struct timespec *now)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -243,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -252,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -265,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -353,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
break;
}
}
- if (__inodelk_needs_contention_notify(this, l, now)) {
- list_add_tail(&l->contend, contend);
- }
+ inodelk_contention_notify_check(this, l, now, contend);
}
}
@@ -401,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
if (can_block == 0) {
goto out;
}
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -420,6 +410,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
lkowner_utoa(&lock->owner), lock->user_flock.l_start,
lock->user_flock.l_len);
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ lock->volume);
out:
return -EAGAIN;
}
@@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret;
- conf = __inodelk_grantable(this, dom, lock, now, contend);
- if (conf) {
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
}
/* To prevent blocked locks starvation, check if there are any blocked
@@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
"starvation");
}
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
- ret = 0;
-
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
@@ -502,33 +495,36 @@ static pl_inode_lock_t *
__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
pl_inode_lock_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ inode = lock->pl_inode->inode;
conf = find_matching_inodelk(lock, dom);
if (!conf) {
gf_log(this->name, GF_LOG_ERROR,
" Matching lock not found for unlock %llu-%llu, by %s "
- "on %p",
+ "on %p for gfid:%s",
(unsigned long long)lock->fl_start,
(unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
- lock->client);
+ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
goto out;
}
__delete_inode_lock(conf);
gf_log(this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock %llu-%llu, by %s on %p",
+ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
(unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
- lkowner_utoa(&lock->owner), lock->client);
+ lkowner_utoa(&lock->owner), lock->client,
+ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
out:
return conf;
}
-static void
+void
__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom,
struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
pl_inode_lock_t *tmp = NULL;
@@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
{
list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct timespec *now,
- struct list_head *contend)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
pl_inode_lock_t *lock;
pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
- INIT_LIST_HEAD(&granted);
-
- pthread_mutex_lock(&pl_inode->mutex);
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
- contend);
- }
- pthread_mutex_unlock(&pl_inode->mutex);
-
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
- {
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
- 0, 0, lock->volume);
+ op_ret, op_errno, lock->volume);
- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
lock->frame = NULL;
}
pthread_mutex_lock(&pl_inode->mutex);
{
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
list_del_init(&lock->blocked_locks);
__pl_inodelk_unref(lock);
@@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock(&pl_inode->mutex);
}
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
+
+ INIT_LIST_HEAD(&granted);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(this, pl_inode, &granted);
+}
+
static void
pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
@@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* and blocked lists, then this means that a parallel
* unlock on another inodelk (L2 say) may have 'granted'
* L1 and added it to 'granted' list in
- * __grant_blocked_node_locks() (although using the
+ * __grant_blocked_inode_locks() (although using the
* 'blocked_locks' member). In that case, the cleanup
* codepath must try and grant other overlapping
* blocked inodelks from other clients, now that L1 is
@@ -691,31 +703,35 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- dom = get_domain(pl_inode, l->volume);
+ dom = get_domain(pl_inode, l->volume);
- grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
- pthread_mutex_lock(&pl_inode->mutex);
- {
- __pl_inodelk_unref(l);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(l);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
}
- pthread_mutex_unlock(&pl_inode->mutex);
- inode_unref(pl_inode->inode);
}
if (pcontend != NULL) {
@@ -737,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
gf_boolean_t need_inode_unref = _gf_false;
struct list_head *pcontend = NULL;
struct list_head contend;
+ struct list_head wake;
struct timespec now = {};
short fl_type;
@@ -788,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
timespec_now(&now);
}
+ INIT_LIST_HEAD(&wake);
+
if (ctx)
pthread_mutex_lock(&ctx->lock);
pthread_mutex_lock(&pl_inode->mutex);
@@ -810,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
- if (can_block)
+ if (can_block) {
unref = _gf_false;
- /* For all but the case where a non-blocking
- * lock attempt fails, the extra ref taken at
- * the start of this function must be negated.
- */
- else
- need_inode_unref = _gf_true;
+ }
}
-
- if (ctx && (!ret || can_block))
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
} else {
/* Irrespective of whether unlock succeeds or not,
* the extra inode ref that was done at the start of
@@ -839,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
list_del_init(&retlock->client_list);
__pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
ret = 0;
}
out:
@@ -849,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
if (ctx)
pthread_mutex_unlock(&ctx->lock);
+ pl_inode_remove_wake(&wake);
+
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -869,17 +891,23 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
}
/* Create a new inode_lock_t */
-pl_inode_lock_t *
+static pl_inode_lock_t *
new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
call_frame_t *frame, xlator_t *this, const char *volume,
- char *conn_id)
+ char *conn_id, int32_t *op_errno)
{
pl_inode_lock_t *lock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t);
if (!lock) {
- return NULL;
+ *op_errno = ENOMEM;
+ goto out;
}
lock->fl_start = flock->l_start;
@@ -907,6 +935,7 @@ new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD(&lock->contend);
__pl_inodelk_ref(lock);
+out:
return lock;
}
@@ -951,6 +980,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
int ret = -1;
GF_UNUSED int dict_ret = -1;
int can_block = 0;
+ short lock_type = 0;
pl_inode_t *pinode = NULL;
pl_inode_lock_t *reqlock = NULL;
pl_dom_list_t *dom = NULL;
@@ -988,7 +1018,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
}
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto unwind;
@@ -1001,11 +1031,10 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid,
- frame, this, dom->domain, conn_id);
+ frame, this, dom->domain, conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -1016,16 +1045,20 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
/* fall through */
case F_SETLK:
+ lock_type = flock->l_type;
memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
inode);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, loc, cmd, flock, volume);
- goto out;
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
goto unwind;
}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index 240c1957a42..a76605027b3 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_locks_mem_types_ {
gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
@@ -19,7 +19,6 @@ enum gf_locks_mem_types_ {
gf_locks_mt_posix_lock_t,
gf_locks_mt_pl_entry_lock_t,
gf_locks_mt_pl_inode_lock_t,
- gf_locks_mt_truncate_ops,
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
gf_locks_mt_pl_fdctx_t,
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index cf2849fc251..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -10,13 +10,13 @@
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks-mem-types.h"
-#include "client_t.h"
+#include <glusterfs/client_t.h>
-#include "lkowner.h"
+#include <glusterfs/lkowner.h>
typedef enum {
MLK_NONE,
@@ -30,11 +30,11 @@ struct __pl_fd;
struct __posix_lock {
struct list_head list;
- short fl_type;
off_t fl_start;
off_t fl_end;
uint32_t lk_flags;
+ short fl_type;
short blocked; /* waiting to acquire */
struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -74,7 +73,6 @@ struct __pl_inode_lock {
struct list_head contend; /* list of contending locks */
int ref;
- short fl_type;
off_t fl_start;
off_t fl_end;
@@ -86,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -102,6 +100,10 @@ struct __pl_inode_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -135,11 +137,10 @@ struct __entry_lock {
const char *volume;
const char *basename;
- entrylk_type type;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -150,6 +151,7 @@ struct __entry_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ entrylk_type type;
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -164,13 +166,14 @@ struct __pl_inode {
struct list_head rw_list; /* list of waiting r/w requests */
struct list_head reservelk_list; /* list of reservelks */
struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head
- blocked_calls; /* List of blocked lock calls while a reserve is held*/
- struct list_head metalk_list; /* Meta lock list */
- /* This is to store the incoming lock
- requests while meta lock is enabled */
- struct list_head queued_locks;
- int mandatory; /* if mandatory locking is enabled */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
@@ -179,6 +182,31 @@ struct __pl_inode {
of inode_t as long as there are
locks on it */
gf_boolean_t migrated;
+
+ /* Flag to indicate whether to read mlock-enforce xattr from disk */
+ gf_boolean_t check_mlock_info;
+
+ /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has
+ held the lock.
+
+ Note: An xattr is set on the file to recover this information post
+ reboot. If client does not want mandatory lock to be enforced, then it
+ should remove this xattr explicitly
+ */
+ gf_boolean_t mlock_enforced;
+ /* There are scenarios where mandatory lock is granted but there are IOs
+ pending at posix level. To avoid this before preempting the previous lock
+ owner, we wait for all the fops to be unwound.
+ */
+ int fop_wind_count;
+ pthread_cond_t check_fop_wind_count;
+
+ gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
@@ -196,29 +224,33 @@ struct __pl_metalk {
typedef struct __pl_metalk pl_meta_lock_t;
typedef struct {
+ char *brickname;
+ uint32_t revocation_secs;
+ uint32_t revocation_max_blocked;
+ uint32_t notify_contention_delay;
mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
- char *brickname;
gf_boolean_t monkey_unlocking;
- uint32_t revocation_secs;
gf_boolean_t revocation_clear_all;
- uint32_t revocation_max_blocked;
gf_boolean_t notify_contention;
- uint32_t notify_contention_delay;
+ gf_boolean_t mlock_enforced;
} posix_locks_private_t;
typedef struct {
- gf_boolean_t entrylk_count_req;
- gf_boolean_t inodelk_count_req;
- gf_boolean_t posixlk_count_req;
- gf_boolean_t parent_entrylk_req;
data_t *inodelk_dom_count_req;
dict_t *xdata;
loc_t loc[2];
fd_t *fd;
+ inode_t *inode;
off_t offset;
glusterfs_fop_t op;
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+ gf_boolean_t multiple_dom_lk_requests;
+ int update_mlock_enforced_flag;
} pl_local_t;
typedef struct {
@@ -239,6 +271,15 @@ typedef struct _locks_ctx {
struct list_head metalk_list;
} pl_ctx_t;
+typedef struct _multi_dom_lk_data {
+ xlator_t *this;
+ inode_t *inode;
+ dict_t *xdata_rsp;
+ gf_boolean_t keep_max;
+} multi_dom_lk_data;
+
+typedef enum { DECREMENT, INCREMENT } pl_count_op_t;
+
pl_ctx_t *
pl_ctx_get(client_t *client, xlator_t *xlator);
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index a99e1bbce43..e2d3d7ca974 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -11,7 +11,7 @@
#ifndef _PL_MESSAGES_H_
#define _PL_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index af162c5284c..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -12,19 +12,15 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "pl-messages.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -43,21 +39,6 @@ pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *);
static int
fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
- do { \
- frame->local = NULL; \
- STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
- if (__local) { \
- if (__local->inodelk_dom_count_req) \
- data_unref(__local->inodelk_dom_count_req); \
- loc_wipe(&__local->loc[0]); \
- loc_wipe(&__local->loc[1]); \
- if (__local->fd) \
- fd_unref(__local->fd); \
- mem_put(__local); \
- } \
- } while (0)
-
/*
* The client is always requesting data, but older
* servers were not returning it. Newer ones are, so
@@ -115,69 +96,156 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \
do { \
if (pl_has_xdata_requests(xdata)) { \
- frame->local = mem_get0(this->local_pool); \
+ if (!frame->local) \
+ frame->local = mem_get0(this->local_pool); \
pl_local_t *__local = frame->local; \
if (__local) { \
if (__fd) { \
__local->fd = fd_ref(__fd); \
+ __local->inode = inode_ref(__fd->inode); \
} else { \
if (__loc) \
loc_copy(&__local->loc[0], __loc); \
if (__newloc) \
loc_copy(&__local->loc[1], __newloc); \
+ __local->inode = inode_ref(__local->loc[0].inode); \
} \
pl_get_xdata_requests(__local, xdata); \
} \
} \
} while (0)
+#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \
+ do { \
+ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \
+ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \
+ inode_t *__inode = (loc ? loc->inode : fd->inode); \
+ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \
+ if (__pl_inode == NULL) { \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ if (!pl_is_mandatory_locking_enabled(__pl_inode) || \
+ !priv->mlock_enforced) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \
+ "option %s would need mandatory lock to be enabled " \
+ "and feature.enforce-mandatory-lock option to be set " \
+ "to on", \
+ GF_ENFORCE_MANDATORY_LOCK); \
+ op_errno = EINVAL; \
+ goto unwind; \
+ } \
+ \
+ op_ret = pl_local_init(frame, this, loc, fd); \
+ if (op_ret) { \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ \
+ ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \
+ } \
+ } while (0)
+
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
gf_boolean_t
pl_has_xdata_requests(dict_t *xdata)
{
- char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT,
- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT,
- GLUSTERFS_PARENT_ENTRYLK, NULL};
+ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS,
+ NULL};
+ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT),
+ SLEN(GLUSTERFS_INODELK_COUNT),
+ SLEN(GLUSTERFS_INODELK_DOM_COUNT),
+ SLEN(GLUSTERFS_POSIXLK_COUNT),
+ SLEN(GLUSTERFS_PARENT_ENTRYLK),
+ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS),
+ 0};
int i = 0;
if (!xdata)
return _gf_false;
for (i = 0; reqs[i]; i++)
- if (dict_get(xdata, reqs[i]))
+ if (dict_getn(xdata, reqs[i], reqs_size[i]))
return _gf_true;
return _gf_false;
}
+static int
+dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
void
pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
{
if (!local || !xdata)
return;
- if (dict_get(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ GF_ASSERT(local->xdata == NULL);
+ local->xdata = dict_copy_with_ref(xdata, NULL);
+
+ if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
local->entrylk_count_req = 1;
- dict_del(xdata, GLUSTERFS_ENTRYLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_INODELK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) {
local->inodelk_count_req = 1;
- dict_del(xdata, GLUSTERFS_INODELK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT);
+ }
+ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) {
+ local->multiple_dom_lk_requests = 1;
+ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS);
+ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ dict_delete_domain_key, NULL);
}
- local->inodelk_dom_count_req = dict_get(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ local->inodelk_dom_count_req = dict_get_sizen(xdata,
+ GLUSTERFS_INODELK_DOM_COUNT);
if (local->inodelk_dom_count_req) {
data_ref(local->inodelk_dom_count_req);
- dict_del(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_POSIXLK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) {
local->posixlk_count_req = 1;
- dict_del(xdata, GLUSTERFS_POSIXLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
local->parent_entrylk_req = 1;
- dict_del(xdata, GLUSTERFS_PARENT_ENTRYLK);
+ dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK);
}
}
@@ -187,20 +255,11 @@ pl_needs_xdata_response(pl_local_t *local)
if (!local)
return _gf_false;
- if (local->parent_entrylk_req)
- return _gf_true;
-
- if (local->entrylk_count_req)
- return _gf_true;
-
- if (local->inodelk_dom_count_req)
- return _gf_true;
-
- if (local->inodelk_count_req)
+ if (local->parent_entrylk_req || local->entrylk_count_req ||
+ local->inodelk_dom_count_req || local->inodelk_count_req ||
+ local->posixlk_count_req || local->multiple_dom_lk_requests)
return _gf_true;
- if (local->posixlk_count_req)
- return _gf_true;
return _gf_false;
}
@@ -221,8 +280,43 @@ pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent,
}
}
-int32_t
-__get_posixlk_count(xlator_t *this, pl_inode_t *pl_inode)
+static inline int
+pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op)
+{
+ pl_inode_t *pl_inode = NULL;
+
+ if (!local)
+ return -1;
+
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode)
+ return -1;
+
+ if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op == DECREMENT) {
+ pl_inode->fop_wind_count--;
+ /* fop_wind_count can go negative when lock enforcement is
+ * enabled on unwind path of an IO. Hence the "<" comparision.
+ */
+ if (pl_inode->fop_wind_count <= 0) {
+ pthread_cond_broadcast(&pl_inode->check_fop_wind_count);
+ pl_inode->track_fop_wind_count = _gf_false;
+ pl_inode->fop_wind_count = 0;
+ }
+ } else {
+ pl_inode->fop_wind_count++;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ return 0;
+}
+
+static int32_t
+__get_posixlk_count(pl_inode_t *pl_inode)
{
posix_lock_t *lock = NULL;
int32_t count = 0;
@@ -237,10 +331,9 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
- int ret = 0;
int32_t count = 0;
- ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ int ret = inode_ctx_get(inode, this, &tmp_pl_inode);
if (ret != 0) {
goto out;
}
@@ -249,7 +342,7 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
pthread_mutex_lock(&pl_inode->mutex);
{
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -265,10 +358,10 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
int32_t maxcount = -1;
int ret = -1;
- if (!parent || !basename || !strlen(basename))
+ if (!parent || !basename)
goto out;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -277,7 +370,7 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
if (maxcount >= entrylk)
return;
out:
- ret = dict_set_int32(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -293,7 +386,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -302,7 +395,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -318,7 +411,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_INODELK_COUNT);
@@ -327,7 +420,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_INODELK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0,
"Failed to set count for "
@@ -347,7 +440,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -356,7 +449,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -364,6 +457,80 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
}
void
+pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max, char *key)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32(dict, key, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
+ count = get_inodelk_count(this, inode, domname);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32(dict, key, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set count for "
+ "key %s",
+ key);
+ }
+
+ return;
+}
+
+static int
+pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ multi_dom_lk_data *d = data;
+ char *tmp_key = NULL;
+ char *save_ptr = NULL;
+
+ tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
+ strtok_r(tmp_key, ":", &save_ptr);
+ if (!*save_ptr) {
+ if (tmp_key)
+ GF_FREE(tmp_key);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+ "Could not tokenize domain string from key %s", key);
+ return -1;
+ }
+
+ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr,
+ d->keep_max, key);
+ if (tmp_key)
+ GF_FREE(tmp_key);
+
+ return 0;
+}
+
+void
+pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local,
+ inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ multi_dom_lk_data data;
+
+ data.this = this;
+ data.inode = inode;
+ data.xdata_rsp = dict;
+ data.keep_max = keep_max;
+
+ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ pl_inodelk_xattr_fill_multiple, &data);
+}
+
+void
pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
inode_t *inode, char *name, dict_t *xdata,
gf_boolean_t max_lock)
@@ -371,41 +538,28 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
if (!xdata || !local)
return;
- if (local->parent_entrylk_req && parent && name && strlen(name))
+ if (local->parent_entrylk_req && parent && name && name[0] != '\0')
pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock);
- if (local->entrylk_count_req && inode)
+ if (!inode)
+ return;
+
+ if (local->entrylk_count_req)
pl_entrylk_xattr_fill(this, inode, xdata, max_lock);
- if (local->inodelk_dom_count_req && inode)
+ if (local->inodelk_dom_count_req)
pl_inodelk_xattr_fill(this, inode, xdata,
data_to_str(local->inodelk_dom_count_req),
max_lock);
- if (local->inodelk_count_req && inode)
+ if (local->inodelk_count_req)
pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock);
- if (local->posixlk_count_req && inode)
+ if (local->posixlk_count_req)
pl_posixlk_xattr_fill(this, inode, xdata, max_lock);
-}
-
-/* Return true in case we need to ensure mandatory-locking
- * semnatics under different modes.
- */
-gf_boolean_t
-pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
-{
- posix_locks_private_t *priv = NULL;
-
- priv = THIS->private;
-
- if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
- return _gf_true;
- else if (priv->mandatory_mode == MLK_FORCED ||
- priv->mandatory_mode == MLK_OPTIMAL)
- return _gf_true;
- return _gf_false;
+ if (local->multiple_dom_lk_requests)
+ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock);
}
/* Checks whether the region where fop is acting upon conflicts
@@ -420,15 +574,19 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
int ret = 0;
if (!__rw_allowable(pl_inode, region, op)) {
- if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ if (pl_inode->mlock_enforced) {
+ *can_block = _gf_false;
+ } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
gf_log("locks", GF_LOG_TRACE,
"returning EAGAIN"
" because fd is O_NONBLOCK");
*can_block = _gf_false;
- } else
+ } else {
*can_block = _gf_true;
- } else
+ }
+ } else {
ret = 1;
+ }
return ret;
}
@@ -436,9 +594,7 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
static pl_fdctx_t *
pl_new_fdctx()
{
- pl_fdctx_t *fdctx = NULL;
-
- fdctx = GF_CALLOC(1, sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
+ pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out);
INIT_LIST_HEAD(&fdctx->locks_list);
@@ -471,7 +627,9 @@ pl_check_n_create_fdctx(xlator_t *this, fd_t *fd)
if (ret != 0) {
GF_FREE(fdctx);
fdctx = NULL;
+ UNLOCK(&fd->lock);
gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx");
+ goto out;
}
}
unlock:
@@ -486,8 +644,10 @@ pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -495,6 +655,8 @@ int
pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
return 0;
@@ -504,6 +666,7 @@ int32_t
pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -520,17 +683,28 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -544,15 +718,19 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_DISCARD,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -581,7 +759,8 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -591,8 +770,10 @@ pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -600,6 +781,8 @@ int
pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
return 0;
@@ -609,6 +792,7 @@ int32_t
pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -625,17 +809,28 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -649,15 +844,19 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_ZEROFILL,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -686,8 +885,8 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -697,24 +896,16 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- pl_local_t *local = NULL;
-
- local = frame->local;
+ pl_local_t *local = frame->local;
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
-
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
+ pl_track_io_fop_count(local, this, DECREMENT);
if (local->op == GF_FOP_TRUNCATE)
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
else
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
@@ -722,6 +913,8 @@ int
pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
@@ -731,6 +924,8 @@ int
pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
@@ -741,7 +936,7 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- pl_local_t *local = NULL;
+ pl_local_t *local = frame->local;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
@@ -755,7 +950,6 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_boolean_t can_block = _gf_true;
int allowed = 1;
- local = frame->local;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
if (op_ret != 0) {
@@ -770,17 +964,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
else
inode = local->fd->inode;
- pl_inode = pl_inode_get(this, inode);
+ local->inode = inode_ref(inode);
+
+ pl_inode = pl_inode_get(this, inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = local->offset;
@@ -794,15 +990,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
allowed = pl_is_fop_allowed(pl_inode, &region, local->fd, local->op,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -855,21 +1055,14 @@ unwind:
"ret: %d, error: %s",
op_ret, strerror(op_errno));
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
-
switch (local->op) {
case GF_FOP_TRUNCATE:
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
case GF_FOP_FTRUNCATE:
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
default:
break;
@@ -901,6 +1094,7 @@ pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, NULL);
ret = 0;
+
unwind:
if (ret == -1) {
gf_log(this ? this->name : "locks", GF_LOG_ERROR,
@@ -1041,68 +1235,68 @@ pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
-pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
- dict_t *xdata)
+static int32_t
+pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode,
+ dict_t **dict, int32_t *op_errno)
{
- int32_t op_errno = EINVAL;
- int op_ret = -1;
int32_t bcount = 0;
int32_t gcount = 0;
- char key[PATH_MAX] = {
- 0,
- };
+ char *key = NULL;
char *lk_summary = NULL;
pl_inode_t *pl_inode = NULL;
- dict_t *dict = NULL;
clrlk_args args = {
0,
};
char *brickname = NULL;
+ int32_t op_ret = -1;
- if (!name)
- goto usual;
-
- if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
- goto usual;
+ *op_errno = EINVAL;
if (clrlk_parse_args(name, &args)) {
- op_errno = EINVAL;
+ *op_errno = EINVAL;
goto out;
}
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
+ *dict = dict_new();
+ if (!*dict) {
+ *op_errno = ENOMEM;
goto out;
}
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
switch (args.type) {
case CLRLK_INODE:
case CLRLK_ENTRY:
- op_ret = clrlk_clear_lks_in_all_domains(
- this, pl_inode, &args, &bcount, &gcount, &op_errno);
- if (op_ret)
- goto out;
+ op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args,
+ &bcount, &gcount, op_errno);
break;
case CLRLK_POSIX:
op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount,
- &gcount, &op_errno);
- if (op_ret)
- goto out;
+ &gcount, op_errno);
break;
- case CLRLK_TYPE_MAX:
- op_errno = EINVAL;
- goto out;
+ default:
+ op_ret = -1;
+ *op_errno = EINVAL;
}
+ if (op_ret) {
+ if (args.type >= CLRLK_TYPE_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks: invalid lock type %d", args.type);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks of type %s failed: %s",
+ clrlk_type_names[args.type], strerror(*op_errno));
+ }
- op_ret = fetch_pathinfo(this, loc->inode, &op_errno, &brickname);
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (op_ret) {
gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname");
} else {
@@ -1117,43 +1311,62 @@ pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
if (!gcount && !bcount) {
if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
- } else if (gf_asprintf(
- &lk_summary,
- "%s: %s blocked locks=%d "
- "granted locks=%d",
- (brickname == NULL) ? this->name : brickname,
- (args.type == CLRLK_INODE)
- ? "inode"
- : (args.type == CLRLK_ENTRY)
- ? "entry"
- : (args.type == CLRLK_POSIX) ? "posix" : " ",
- bcount, gcount) == -1) {
+ } else if (gf_asprintf(&lk_summary,
+ "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL) ? this->name : brickname,
+ clrlk_type_names[args.type], bcount, gcount) == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
+ gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary);
- if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) {
+ key = gf_strdup(name);
+ if (!key) {
op_ret = -1;
goto out;
}
- if (dict_set_dynstr(dict, key, lk_summary)) {
+ if (dict_set_dynstr(*dict, key, lk_summary)) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
op_ret = 0;
+
out:
GF_FREE(brickname);
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
-
GF_FREE(args.opts);
- if (op_ret && lk_summary)
+ GF_FREE(key);
+ if (op_ret) {
GF_FREE(lk_summary);
+ }
+
+ return op_ret;
+}
+
+int32_t
+pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ dict_t *dict = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno);
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+
if (dict)
dict_unref(dict);
return 0;
@@ -1219,7 +1432,7 @@ fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno,
goto out;
}
- ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, brickname);
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname);
if (ret)
goto out;
@@ -1242,15 +1455,12 @@ out:
int
pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- int ret = -1;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *brickname = NULL;
char *end = NULL;
char *tmp = NULL;
- priv = this->private;
-
- ret = fetch_pathinfo(this, inode, op_errno, &brickname);
+ int ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (ret)
goto out;
@@ -1278,12 +1488,10 @@ out:
char *
pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *key = NULL;
int ret = 0;
- priv = this->private;
-
if (priv->brickname == NULL) {
ret = pl_lockinfo_get_brickname(this, inode, op_errno);
if (ret < 0) {
@@ -1301,14 +1509,13 @@ int32_t
pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
char *key = NULL, *buf = NULL;
int32_t op_ret = 0;
unsigned long fdnum = 0;
int32_t len = 0;
dict_t *tmp = NULL;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
@@ -1348,8 +1555,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- len = dict_serialized_length(tmp);
- if (len < 0) {
+ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
+ (unsigned int *)&len);
+ if (op_ret != 0) {
*op_errno = -op_ret;
op_ret = -1;
gf_log(this->name, GF_LOG_WARNING,
@@ -1359,24 +1567,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (buf == NULL) {
- op_ret = -1;
- *op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = dict_serialize(tmp, buf);
- if (op_ret < 0) {
- *op_errno = -op_ret;
- op_ret = -1;
- gf_log(this->name, GF_LOG_WARNING,
- "dict_serialize failed (%s) while handling lockinfo "
- "for fd (ptr: %p inode-gfid:%s)",
- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
- goto out;
- }
-
op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
if (op_ret < 0) {
*op_errno = -op_ret;
@@ -1429,6 +1619,11 @@ pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
}
goto unwind;
+ } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) ==
+ 0) {
+ op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno);
+
+ goto unwind;
} else {
goto usual;
}
@@ -1451,14 +1646,11 @@ int32_t
pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t newfd_num = 0;
posix_lock_t *l = NULL;
int32_t op_ret = 0;
+ uint64_t newfd_num = fd_to_fdnum(newfd);
- newfd_num = fd_to_fdnum(newfd);
-
- pl_inode = pl_inode_get(frame->this, newfd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL);
if (pl_inode == NULL) {
op_ret = -1;
*op_errno = EBADFD;
@@ -1487,11 +1679,10 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
int len, int32_t *op_errno)
{
int32_t op_ret = -1;
- dict_t *lockinfo = NULL;
uint64_t oldfd_num = 0;
char *key = NULL;
- lockinfo = dict_new();
+ dict_t *lockinfo = dict_new();
if (lockinfo == NULL) {
op_ret = -1;
*op_errno = ENOMEM;
@@ -1537,6 +1728,27 @@ int32_t
pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -1546,12 +1758,14 @@ int32_t
pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_ret = 0, op_errno = 0;
+ int32_t op_errno = 0;
void *lockinfo_buf = NULL;
int len = 0;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, &lockinfo_buf,
- &len);
+ int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
if (lockinfo_buf == NULL) {
goto usual;
}
@@ -1564,12 +1778,17 @@ pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
usual:
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd,
+ priv);
+
STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
return 0;
unwind:
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+ PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL);
+
return 0;
}
@@ -1617,10 +1836,7 @@ pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
-
- pl_inode = pl_inode_get(this, fd->inode);
-
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL);
@@ -1696,14 +1912,12 @@ pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int op_errno = EINVAL;
pl_inode_t *pl_inode = NULL;
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
op_ret = 0, op_errno = 0;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode");
op_ret = -1;
@@ -1784,7 +1998,8 @@ int
pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
xdata);
@@ -1796,6 +2011,8 @@ pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iovec *vector, int32_t count,
struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf,
iobref, xdata);
@@ -1807,6 +2024,8 @@ pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf,
xdata);
@@ -1829,6 +2048,10 @@ do_blocked_rw(pl_inode_t *pl_inode)
if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) {
list_del_init(&rw->list);
list_add_tail(&rw->list, &wind_list);
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
}
}
}
@@ -1844,14 +2067,68 @@ do_blocked_rw(pl_inode_t *pl_inode)
return;
}
+/* when mandatory lock is enforced:
+ If an IO request comes on a region which is out of the boundary of the
+ granted mandatory lock, it will be rejected.
+
+ Note: There is no IO blocking with mandatory lock enforced as it may be
+ a stale data from an old client.
+ */
+gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new)
+{
+ if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end)
+ return _gf_true;
+
+ return _gf_false;
+}
+
static int
__rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op)
{
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = THIS->private;
int ret = 1;
- priv = THIS->private;
+ if (pl_inode->mlock_enforced) {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ /*
+ with lock enforced (fencing) there should not be any blocking
+ lock coexisting.
+ */
+ if (same_owner(l, region)) {
+ /* Should range check be strict for same owner with fencing? */
+ if (locks_overlap(l, region)) {
+ if (within_range(l, region)) {
+ return 1;
+ } else {
+ /*
+ Should we allow read fop if it does not fit it in the
+ range?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ } else {
+ if (locks_overlap(l, region)) {
+ /*
+ with fencing should a read from a different owner be
+ allowed if the mandatory lock taken is F_RDLCK?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ }
+
+ /* No lock has been taken by this owner */
+ return 0;
+ }
list_for_each_entry(l, &pl_inode->ext_list, list)
{
@@ -1875,6 +2152,8 @@ int
pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
@@ -1885,6 +2164,7 @@ int
pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -1901,18 +2181,26 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -1926,15 +2214,19 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_READ,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -1965,8 +2257,8 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
return 0;
}
@@ -1976,6 +2268,8 @@ pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
flags, iobref, xdata);
@@ -1988,6 +2282,7 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -2004,18 +2299,26 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -2029,15 +2332,24 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_WRITE,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
- op_errno = EAGAIN;
+ } else if (!can_block) {
+ if (pl_inode->mlock_enforced) {
+ op_errno = EBUSY;
+ } else {
+ op_errno = EAGAIN;
+ }
+
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -2068,7 +2380,8 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -2076,29 +2389,25 @@ unwind:
static int
__fd_has_locks(pl_inode_t *pl_inode, fd_t *fd)
{
- int found = 0;
posix_lock_t *l = NULL;
list_for_each_entry(l, &pl_inode->ext_list, list)
{
if (l->fd_num == fd_to_fdnum(fd)) {
- found = 1;
- break;
+ return 1;
}
}
- return found;
+ return 0;
}
static posix_lock_t *
lock_dup(posix_lock_t *lock)
{
- posix_lock_t *new_lock = NULL;
-
- new_lock = new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
- &lock->owner, (fd_t *)lock->fd_num,
- lock->lk_flags, lock->blocking);
- return new_lock;
+ int32_t op_errno = 0;
+ return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
+ &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags,
+ lock->blocking, &op_errno);
}
static int
@@ -2127,14 +2436,7 @@ __dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
static int
__copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
{
- int ret = 0;
-
- ret = __dup_locks_to_fdctx(pl_inode, fd, fdctx);
- if (ret)
- goto out;
-
-out:
- return ret;
+ return __dup_locks_to_fdctx(pl_inode, fd, fdctx);
}
static void
@@ -2205,9 +2507,10 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
pthread_mutex_lock(&pl_inode->mutex);
{
if (!__fd_has_locks(pl_inode, fd)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd);
ret = 0;
- goto unlock;
+ goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd");
@@ -2231,15 +2534,17 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
"fdctx present -> returning the next lock");
ret = __set_next_lock_fd(fdctx, reqlock);
if (ret) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG,
"could not get next lock of fd");
- goto unlock;
+ goto out;
}
}
}
unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return ret;
}
@@ -2252,12 +2557,10 @@ pl_metalock_is_active(pl_inode_t *pl_inode)
return 1;
}
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block)
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock)
{
list_add_tail(&reqlock->list, &pl_inode->queued_locks);
-
- return 0;
}
int
@@ -2270,13 +2573,12 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
int can_block = 0;
posix_lock_t *reqlock = NULL;
posix_lock_t *conf = NULL;
- int ret = 0;
uint32_t lk_flags = 0;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
+ pl_local_t *local = NULL;
+ short lock_type = 0;
- ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
+ int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
if (ret == 0) {
if (priv->mandatory_mode == MLK_NONE)
gf_log(this->name, GF_LOG_DEBUG,
@@ -2305,7 +2607,17 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
flock->l_len = labs(flock->l_len);
}
- pl_inode = pl_inode_get(this, fd->inode);
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ } else {
+ frame->local = local;
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
@@ -2313,11 +2625,11 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
}
reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid,
- &frame->root->lk_owner, fd, lk_flags, can_block);
+ &frame->root->lk_owner, fd, lk_flags, can_block,
+ &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -2409,6 +2721,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
case F_SETLK:
reqlock->frame = frame;
reqlock->this = this;
+ lock_type = flock->l_type;
pthread_mutex_lock(&pl_inode->mutex);
{
@@ -2430,10 +2743,23 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
}
+ if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) {
+ ret = pl_lock_preempt(pl_inode, reqlock);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "lock preempt failed");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock(reqlock);
+ goto out;
+ }
+
+ pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ goto unwind;
+ }
+
ret = pl_setlk(this, pl_inode, reqlock, can_block);
if (ret == -1) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ if ((can_block) && (F_UNLCK != lock_type)) {
goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -2455,7 +2781,7 @@ unwind:
pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper(this, fd->inode);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
+ PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -2488,7 +2814,7 @@ pl_forget(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&inodelks_released);
INIT_LIST_HEAD(&entrylks_released);
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode)
return 0;
@@ -2562,25 +2888,33 @@ pl_forget(xlator_t *this, inode_t *inode)
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
- {
- STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, NULL);
- __destroy_lock(ext_l);
+ if (!list_empty(&posixlks_released)) {
+ list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
+ {
+ STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock,
+ NULL);
+ __destroy_lock(ext_l);
+ }
}
- list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, blocked_locks)
- {
- STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
- __pl_inodelk_unref(ino_l);
+ if (!list_empty(&inodelks_released)) {
+ list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref(ino_l);
+ }
}
- list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
- blocked_locks)
- {
- STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
- GF_FREE((char *)entry_l->basename);
- GF_FREE(entry_l->connection_id);
- GF_FREE(entry_l);
+ if (!list_empty(&entrylks_released)) {
+ list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE((char *)entry_l->basename);
+ GF_FREE(entry_l->connection_id);
+ GF_FREE(entry_l);
+ }
}
pthread_mutex_destroy(&pl_inode->mutex);
@@ -2654,11 +2988,85 @@ out:
return ret;
}
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
+
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
+
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
+
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
@@ -2667,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
return 0;
}
@@ -2730,9 +3146,8 @@ pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
lock_migration_info_t *
gf_mig_info_for_lock(posix_lock_t *lock)
{
- lock_migration_info_t *new = NULL;
-
- new = GF_CALLOC(1, sizeof(lock_migration_info_t), gf_common_mt_lock_mig);
+ lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t),
+ gf_common_mt_lock_mig);
if (new == NULL) {
goto out;
}
@@ -2760,7 +3175,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
{
if (list_empty(&pl_inode->ext_list)) {
count = 0;
- goto out;
+ goto unlock;
}
list_for_each_entry(temp, &pl_inode->ext_list, list)
@@ -2770,6 +3185,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
newlock = gf_mig_info_for_lock(temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed");
count = -1;
goto out;
@@ -2780,8 +3196,9 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
}
}
-out:
+unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return count;
}
@@ -2797,7 +3214,7 @@ pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
INIT_LIST_HEAD(&locks.list);
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -2837,9 +3254,8 @@ __pl_metalk_ref(pl_meta_lock_t *lock)
pl_meta_lock_t *
new_meta_lock(call_frame_t *frame, xlator_t *this)
{
- pl_meta_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_meta_lock_t);
+ pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock),
+ gf_locks_mt_pl_meta_lock_t);
if (!lock) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
@@ -2913,7 +3329,7 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode)
pl_meta_lock_t *reqlk = NULL;
pl_ctx_t *ctx = NULL;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
"pl_inode mem allocation failedd");
@@ -2987,9 +3403,8 @@ out:
return ret;
}
-void
-__unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
if (list_empty(&pl_inode->queued_locks))
return;
@@ -2997,9 +3412,8 @@ __unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
list_splice_init(&pl_inode->queued_locks, tmp_list);
}
-void
-__unwind_blocked_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
posix_lock_t *lock = NULL;
posix_lock_t *tmp = NULL;
@@ -3047,7 +3461,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
goto out;
}
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
ret = -1;
goto out;
@@ -3058,12 +3472,12 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
pthread_mutex_lock(&pl_inode->mutex);
{
/* Unwind queued locks regardless of migration status */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
/* Unwind blocked locks only for successful migration */
- if (dict_get(dict, "status")) {
+ if (dict_get_sizen(dict, "status")) {
/* unwind all blocked locks */
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
}
/* unlock metalk */
@@ -3090,7 +3504,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
inode_unref(pl_inode->inode);
}
- if (dict_get(dict, "status"))
+ if (dict_get_sizen(dict, "status"))
pl_inode->migrated = _gf_true;
else
pl_inode->migrated = _gf_false;
@@ -3119,6 +3533,34 @@ int32_t
pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ while (pl_inode->fop_wind_count > 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "waiting for existing fops (count %d) to drain for "
+ "gfid %s",
+ pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid));
+ pthread_cond_wait(&pl_inode->check_fop_wind_count,
+ &pl_inode->mutex);
+ }
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -3130,15 +3572,16 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int op_ret = 0;
int op_errno = EINVAL;
dict_t *xdata_rsp = NULL;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- if (dict_get(dict, GF_META_LOCK_KEY)) {
+ if (dict_get_sizen(dict, GF_META_LOCK_KEY)) {
op_ret = pl_metalk(frame, this, loc->inode);
- } else if (dict_get(dict, GF_META_UNLOCK_KEY)) {
+ } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) {
op_ret = pl_metaunlock(frame, this, loc->inode, dict);
-
} else {
goto usual;
}
@@ -3148,9 +3591,17 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
return 0;
usual:
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL),
+ priv);
+
STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
void
@@ -3159,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
time_t *blkd_time, gf_boolean_t active)
{
char *type_str = NULL;
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
@@ -3213,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
int count = 0;
@@ -3236,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3247,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3264,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3316,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3330,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3364,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, NULL, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3445,11 +3894,15 @@ unlock:
__dump_inodelks(pl_inode);
}
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
if (count) {
gf_proc_dump_write("posixlk-count", "%d", count);
__dump_posixlks(pl_inode);
}
+
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -3558,9 +4011,9 @@ pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* unwind all queued and blocked locks to check
* migration status and find the correct
* destination */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
list_del_init(&meta_lock->list);
@@ -3592,10 +4045,7 @@ unlock:
static int
pl_client_disconnect_cbk(xlator_t *this, client_t *client)
{
- pl_ctx_t *pl_ctx = NULL;
-
- pl_ctx = pl_ctx_get(client, this);
-
+ pl_ctx_t *pl_ctx = pl_ctx_get(client, this);
if (pl_ctx) {
pl_inodelk_client_cleanup(this, pl_ctx);
pl_entrylk_client_cleanup(this, pl_ctx);
@@ -3632,10 +4082,9 @@ pl_client_destroy_cbk(xlator_t *this, client_t *client)
int
reconfigure(xlator_t *this, dict_t *options)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
int ret = -1;
-
- priv = this->private;
+ char *tmp_str = NULL;
GF_OPTION_RECONF("trace", priv->trace, options, bool, out);
@@ -3657,6 +4106,20 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay,
options, uint32, out);
+ GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out);
+
+ GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options,
+ bool, out);
+
+ if (!strcmp(tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp(tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp(tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+
ret = 0;
out:
@@ -3704,6 +4167,7 @@ init(xlator_t *this)
priv->mandatory_mode = MLK_OPTIMAL;
else
priv->mandatory_mode = MLK_NONE;
+
tmp_str = NULL;
GF_OPTION_INIT("trace", priv->trace, bool, out);
@@ -3723,6 +4187,8 @@ init(xlator_t *this)
GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay,
uint32, out);
+ GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out);
+
this->local_pool = mem_pool_new(pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -3741,19 +4207,21 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
if (!priv)
- return 0;
+ return;
this->private = NULL;
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
GF_FREE(priv->brickname);
GF_FREE(priv);
- return 0;
+ return;
}
int
@@ -3780,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
postoldparent, prenewparent, postnewparent, xdata);
+
return 0;
}
@@ -3789,19 +4260,23 @@ int32_t
pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
posix_lock_t *
gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi)
{
- posix_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
+ posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t),
+ gf_locks_mt_posix_lock_t);
if (!lock)
goto out;
@@ -3849,6 +4324,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* Just making sure the activelk list is empty. Should not
* happen though*/
if (!list_empty(&pl_inode->ext_list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found");
ret = -1;
@@ -3857,6 +4333,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* This list also should not be empty */
if (list_empty(&locklist->list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list");
ret = -1;
@@ -3867,6 +4344,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
{
newlock = gf_lkmig_info_to_posix_lock(frame, temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
"mem allocation failed for newlock");
@@ -3876,12 +4354,10 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
list_add_tail(&newlock->list, &pl_inode->ext_list);
}
}
-
-out:
/*TODO: What if few lock add failed with ENOMEM. Should the already
* added locks be clearted */
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
return ret;
}
@@ -3889,12 +4365,11 @@ static int
pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
lock_migration_info_t *locklist, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
int op_ret = 0;
int op_errno = 0;
int ret = 0;
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -3917,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -3926,9 +4404,14 @@ int32_t
pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -3946,7 +4429,7 @@ int
pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
@@ -3964,7 +4447,7 @@ pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
@@ -3984,7 +4467,7 @@ int
pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
return 0;
@@ -3995,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4004,9 +4490,14 @@ int
pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4025,7 +4516,7 @@ int
pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
return 0;
@@ -4036,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
return 0;
@@ -4045,9 +4549,18 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
@@ -4121,7 +4634,7 @@ pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs, loc, xdata);
return 0;
@@ -4131,6 +4644,28 @@ int32_t
pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ pl_inode->track_fop_wind_count = _gf_true;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4140,16 +4675,51 @@ int
pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ int op_ret = 0;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc,
+ ((fd_t *)NULL), priv);
+
STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
}
int32_t
pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4159,10 +4729,23 @@ int
pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this,
+ ((loc_t *)NULL), fd, priv);
+
STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+ return 0;
}
int32_t
@@ -4198,7 +4781,7 @@ int
pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
return 0;
@@ -4237,7 +4820,7 @@ int
pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
@@ -4298,7 +4881,7 @@ int
pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
return 0;
@@ -4316,7 +4899,7 @@ int
pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access, loc, mask, xdata);
return 0;
@@ -4465,7 +5048,7 @@ struct volume_options options[] = {
"be used in conjunction w/ revocation-clear-all."},
{.key = {"notify-contention"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "no",
+ .default_value = "yes",
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.op_version = {GD_OP_VERSION_4_0_0},
.tags = {"locks", "contention"},
@@ -4488,5 +5071,25 @@ struct volume_options options[] = {
"on the same inode. If multiple lock requests are "
"received during this period, only one upcall will "
"be sent."},
+ {.key = {"enforce-mandatory-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .flags = OPT_FLAG_SETTABLE,
+ .op_version = {GD_OP_VERSION_6_0},
+ .description = "option to enable lock enforcement"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "locks",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 8b080dba030..604691fd887 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
@@ -31,12 +31,10 @@ reservelks_equal(posix_lock_t *l1, posix_lock_t *l2)
static posix_lock_t *
__reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
posix_lock_t *l = NULL;
posix_lock_t *ret_lock = NULL;
- this = THIS;
-
if (list_empty(&pl_inode->reservelk_list)) {
gf_log(this->name, GF_LOG_TRACE, "No reservelks in list");
goto out;
@@ -82,10 +80,9 @@ __matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock)
static int
__reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
int ret = 0;
- conf = __matching_reservelk(pl_inode, lock);
+ posix_lock_t *conf = __matching_reservelk(pl_inode, lock);
if (conf) {
gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found");
if (__same_owner_reservelk(lock, conf)) {
@@ -104,29 +101,28 @@ __reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
int
pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
int ret = 0;
pthread_mutex_lock(&pl_inode->mutex);
{
if (__reservelk_conflict(this, pl_inode, lock)) {
+ lock->blocked = can_block;
+ list_add_tail(&lock->list, &pl_inode->blocked_calls);
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_TRACE,
"Found conflicting reservelk. Blocking until reservelk is "
"unlocked.");
- lock->blocked = can_block;
- list_add_tail(&lock->list, &pl_inode->blocked_calls);
ret = -1;
- goto unlock;
+ goto out;
}
-
- gf_log(this->name, GF_LOG_TRACE,
- "no conflicting reservelk found. Call continuing");
- ret = 0;
}
-unlock:
pthread_mutex_unlock(&pl_inode->mutex);
-
+ gf_log(this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+out:
return ret;
}
@@ -135,12 +131,11 @@ unlock:
*/
static int
__lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
- posix_lock_t *conf = NULL;
int ret = -EINVAL;
- conf = __reservelk_grantable(pl_inode, lock);
+ posix_lock_t *conf = __reservelk_grantable(pl_inode, lock);
if (conf) {
ret = -EAGAIN;
if (can_block == 0)
@@ -183,9 +178,7 @@ find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode)
static posix_lock_t *
__reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
{
- posix_lock_t *conf = NULL;
-
- conf = find_matching_reservelk(lock, pl_inode);
+ posix_lock_t *conf = find_matching_reservelk(lock, pl_inode);
if (!conf) {
gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock");
goto out;
@@ -319,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode)
ret = pl_setlk(this, pl_inode, lock, can_block);
if (ret == -1) {
if (can_block) {
- pl_trace_block(this, lock->frame, fd, NULL, cmd,
- &lock->user_flock, NULL);
continue;
} else {
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -345,6 +336,7 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
retlock = __reserve_unlock_lock(this, lock, pl_inode);
if (!retlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock");
ret = -EINVAL;
goto out;
@@ -354,9 +346,8 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
__destroy_lock(retlock);
ret = 0;
}
-out:
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
grant_blocked_reserve_locks(this, pl_inode);
grant_blocked_lock_calls(this, pl_inode);
@@ -372,19 +363,20 @@ pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
pthread_mutex_lock(&pl_inode->mutex);
{
ret = __lock_reservelk(this, pl_inode, lock, can_block);
- if (ret < 0)
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner),
- lock->user_flock.l_start, lock->user_flock.l_len);
- else
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner), lock->fl_start,
- lock->fl_end);
}
pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (ret < 0)
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end);
+
return ret;
}
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index c4759bd4a5f..d285b12b5aa 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
index 4989efb13d6..9c9047005d6 100644
--- a/xlators/features/marker/src/marker-common.c
+++ b/xlators/features/marker/src/marker-common.c
@@ -55,10 +55,3 @@ unlock:
return ret;
}
-
-int
-marker_filter_quota_xattr(dict_t *dict, char *key, data_t *value, void *data)
-{
- dict_del(dict, key);
- return 0;
-}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
index 449d55b5ef0..7f8cffe7d35 100644
--- a/xlators/features/marker/src/marker-common.h
+++ b/xlators/features/marker/src/marker-common.h
@@ -10,12 +10,10 @@
#ifndef _MARKER_COMMON_H
#define _MARKER_COMMON_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker.h"
int32_t
marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **);
-int
-marker_filter_quota_xattr(dict_t *, char *, data_t *, void *);
#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
index 7d590d7ec84..aedfdb4a1b7 100644
--- a/xlators/features/marker/src/marker-mem-types.h
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -10,9 +10,10 @@
#ifndef __MARKER_MEM_TYPES_H__
#define __MARKER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_marker_mem_types_ {
+ /* Those are used by ALLOCATE_OR_GOTO macro */
gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
gf_marker_mt_loc_t,
gf_marker_mt_volume_mark,
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
index 694493c778c..ecd85d67b2b 100644
--- a/xlators/features/marker/src/marker-quota-helper.c
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "locking.h"
+#include <glusterfs/locking.h>
#include "marker-quota.h"
#include "marker-common.h"
#include "marker-quota-helper.h"
@@ -378,96 +378,3 @@ mq_inode_ctx_new(inode_t *inode, xlator_t *this)
{
return __mq_inode_ctx_new(inode, this);
}
-
-quota_local_t *
-mq_local_new()
-{
- quota_local_t *local = NULL;
-
- local = mem_get0(THIS->local_pool);
- if (!local)
- goto out;
-
- local->ref = 1;
- LOCK_INIT(&local->lock);
-
- local->ctx = NULL;
- local->contri = NULL;
-
-out:
- return local;
-}
-
-quota_local_t *
-mq_local_ref(quota_local_t *local)
-{
- LOCK(&local->lock);
- {
- local->ref++;
- }
- UNLOCK(&local->lock);
-
- return local;
-}
-
-int32_t
-mq_local_unref(xlator_t *this, quota_local_t *local)
-{
- int32_t ref = 0;
- if (local == NULL)
- goto out;
-
- QUOTA_SAFE_DECREMENT(&local->lock, local->ref, ref);
-
- if (ref != 0)
- goto out;
-
- if (local->fd != NULL)
- fd_unref(local->fd);
-
- if (local->contri)
- GF_REF_PUT(local->contri);
-
- if (local->xdata)
- dict_unref(local->xdata);
-
- loc_wipe(&local->loc);
-
- loc_wipe(&local->parent_loc);
-
- LOCK_DESTROY(&local->lock);
-
- mem_put(local);
-out:
- return 0;
-}
-
-inode_contribution_t *
-mq_get_contribution_from_loc(xlator_t *this, loc_t *loc)
-{
- int32_t ret = 0;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- ret = mq_inode_ctx_get(loc->inode, this, &ctx);
- if (ret < 0) {
- gf_log_callingfn(this->name, GF_LOG_WARNING,
- "cannot get marker-quota context from inode "
- "(gfid:%s, path:%s)",
- uuid_utoa(loc->inode->gfid), loc->path);
- goto err;
- }
-
- contribution = mq_get_contribution_node(loc->parent, ctx);
- if (contribution == NULL) {
- gf_log_callingfn(this->name, GF_LOG_WARNING,
- "inode (gfid:%s, path:%s) has "
- "no contribution towards parent (gfid:%s)",
- uuid_utoa(loc->inode->gfid), loc->path,
- uuid_utoa(loc->parent->gfid));
- goto err;
- }
-
-err:
- return contribution;
-}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
index 99723def1b9..d4091dd2180 100644
--- a/xlators/features/marker/src/marker-quota-helper.h
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -57,22 +57,10 @@ mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *);
int32_t
mq_inode_loc_fill(const char *, inode_t *, loc_t *);
-quota_local_t *
-mq_local_new();
-
-quota_local_t *
-mq_local_ref(quota_local_t *);
-
-int32_t
-mq_local_unref(xlator_t *, quota_local_t *);
-
inode_contribution_t *
mq_contri_init(inode_t *inode);
inode_contribution_t *
mq_get_contribution_node(inode_t *, quota_inode_ctx_t *);
-inode_contribution_t *
-mq_get_contribution_from_loc(xlator_t *this, loc_t *loc);
-
#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
index 0fc7ba66ee0..3de2ea1c92c 100644
--- a/xlators/features/marker/src/marker-quota.c
+++ b/xlators/features/marker/src/marker-quota.c
@@ -7,16 +7,16 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
-#include "common-utils.h"
-#include "byte-order.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/byte-order.h>
#include "marker-quota.h"
#include "marker-quota-helper.h"
-#include "syncop.h"
-#include "quota-common-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/quota-common-utils.h>
int
mq_loc_copy(loc_t *dst, loc_t *src)
@@ -134,27 +134,14 @@ out:
return -1;
}
-int32_t
+static void
mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status)
{
GF_VALIDATE_OR_GOTO("marker", ctx, out);
mq_set_ctx_status(ctx, &ctx->dirty_status, status);
- return 0;
-out:
- return -1;
-}
-
-int32_t
-mq_test_and_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t *status)
-{
- GF_VALIDATE_OR_GOTO("marker", ctx, out);
- GF_VALIDATE_OR_GOTO("marker", status, out);
-
- mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, status);
- return 0;
out:
- return -1;
+ return;
}
int
@@ -866,19 +853,6 @@ out:
}
int32_t
-mq_get_size(xlator_t *this, loc_t *loc, quota_meta_t *size)
-{
- return _mq_get_metadata(this, loc, NULL, size, 0);
-}
-
-int32_t
-mq_get_contri(xlator_t *this, loc_t *loc, quota_meta_t *contri,
- uuid_t contri_gfid)
-{
- return _mq_get_metadata(this, loc, contri, NULL, contri_gfid);
-}
-
-int32_t
mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta,
quota_inode_ctx_t *ctx, inode_contribution_t *contribution)
{
@@ -1337,19 +1311,6 @@ out:
return ret;
}
-int
-mq_create_xattrs_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("marker", loc, out);
- GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
-
- ret = _mq_create_xattrs_txn(this, loc, buf, _gf_false);
-out:
- return ret;
-}
-
int32_t
mq_reduce_parent_size_task(void *opaque)
{
@@ -1752,21 +1713,17 @@ mq_initiate_quota_task(void *opaque)
}
out:
- if (dirty) {
- if (ret < 0) {
- /* On failure clear dirty status flag.
- * In the next lookup inspect_directory_xattr
- * can set the status flag and fix the
- * dirty directory.
- * Do the same if the dir was dirty before
- * txn
- */
- ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
- if (ret == 0)
- mq_set_ctx_dirty_status(parent_ctx, _gf_false);
- } else {
- ret = mq_mark_dirty(this, &parent_loc, 0);
- }
+ if ((dirty) && (ret < 0)) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if the dir was dirty before
+ * txn
+ */
+ ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status(parent_ctx, _gf_false);
}
if (locked)
@@ -1977,7 +1934,7 @@ mq_update_dirty_inode_task(void *opaque)
/* Inculde for self */
contri_sum.dir_count++;
- ret = mq_get_size(this, loc, &size);
+ ret = _mq_get_metadata(this, loc, NULL, &size, 0);
if (ret < 0)
goto out;
@@ -2046,8 +2003,8 @@ mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx)
GF_VALIDATE_OR_GOTO("marker", loc, out);
GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- ret = mq_test_and_set_ctx_dirty_status(ctx, &status);
- if (ret < 0 || status == _gf_true)
+ mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status);
+ if (status == _gf_true)
goto out;
ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc);
@@ -2102,6 +2059,9 @@ mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx,
if (ret < 0)
goto create_xattr;
+ if (!contribution)
+ goto create_xattr;
+
if (!loc_is_root(loc)) {
GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen);
if (keylen < 0) {
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
index 7e24f1bc4f7..4bbf6878b22 100644
--- a/xlators/features/marker/src/marker-quota.h
+++ b/xlators/features/marker/src/marker-quota.h
@@ -10,11 +10,11 @@
#ifndef _MARKER_QUOTA_H
#define _MARKER_QUOTA_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker-mem-types.h"
-#include "refcount.h"
-#include "quota-common-utils.h"
-#include "call-stub.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/call-stub.h>
#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
@@ -23,15 +23,6 @@
#define QUOTA_KEY_MAX 512
#define READDIR_BUF 4096
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY(_frame->root); \
- mq_local_unref(_this, _local); \
- } while (0)
-
#define QUOTA_ALLOC(var, type, ret) \
do { \
ret = 0; \
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index 2d3177c7ec3..1375ccc498c 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -7,17 +7,17 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
#include "marker.h"
#include "marker-mem-types.h"
#include "marker-quota.h"
#include "marker-quota-helper.h"
#include "marker-common.h"
-#include "byte-order.h"
-#include "syncop.h"
-#include "syscall.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syscall.h>
#include <fnmatch.h>
@@ -242,24 +242,19 @@ out:
return ret;
}
-int32_t
+void
marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno)
{
- marker_conf_t *priv = NULL;
- const char *path = NULL;
-
- priv = (marker_conf_t *)this->private;
- path = local ? (local->loc.path ? local->loc.path
- : uuid_utoa(local->loc.gfid))
- : "<nul>";
+ marker_conf_t *priv = (marker_conf_t *)this->private;
+ const char *path = local ? ((local->loc.path) ? local->loc.path
+ : uuid_utoa(local->loc.gfid))
+ : "<nul>";
gf_log(this->name, GF_LOG_CRITICAL,
"Indexing gone corrupt at %s (reason: %s)."
" Geo-replication slave content needs to be revalidated",
path, strerror(op_errno));
sys_unlink(priv->timestamp_file);
-
- return 0;
}
int32_t
@@ -567,24 +562,21 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
- int32_t done = 0;
+ int32_t done = 1;
marker_local_t *local = NULL;
local = (marker_local_t *)frame->local;
if (op_ret == -1 && op_errno == ENOSPC) {
marker_error_handler(this, local, op_errno);
- done = 1;
goto out;
}
if (local) {
if (local->loc.path && strcmp(local->loc.path, "/") == 0) {
- done = 1;
goto out;
}
if (__is_root_gfid(local->loc.gfid)) {
- done = 1;
goto out;
}
}
@@ -595,14 +587,11 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_log(this->name, GF_LOG_DEBUG,
"Error occurred "
"while traversing to the parent, stopping marker");
-
- done = 1;
-
goto out;
}
marker_start_setxattr(frame, this);
-
+ done = 0;
out:
if (done) {
marker_setxattr_done(frame);
@@ -3564,3 +3553,16 @@ struct volume_options options[] = {
.flags = OPT_FLAG_NONE,
},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "marker",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
index 3b6f4ec8b72..4821094c14b 100644
--- a/xlators/features/marker/src/marker.h
+++ b/xlators/features/marker/src/marker.h
@@ -11,10 +11,10 @@
#define _MARKER_H
#include "marker-quota.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-uuid.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/call-stub.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
#define XTIME "xtime"
diff --git a/xlators/experimental/dht2/dht2-server/Makefile.am b/xlators/features/metadisp/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/experimental/dht2/dht2-server/Makefile.am
+++ b/xlators/features/metadisp/Makefile.am
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c
index 79812aa330f..86c5ebee900 100644
--- a/xlators/features/namespace/src/namespace.c
+++ b/xlators/features/namespace/src/namespace.c
@@ -15,10 +15,9 @@
#include <sys/types.h>
-#include "defaults.h"
-#include "glusterfs.h"
-#include "hashfn.h"
-#include "logging.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/logging.h>
#include "namespace.h"
/* Return codes for common path parsing functions. */
@@ -1330,3 +1329,16 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "namespace",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/namespace/src/namespace.h b/xlators/features/namespace/src/namespace.h
index 4c04cb3f471..3a9b84d6426 100644
--- a/xlators/features/namespace/src/namespace.h
+++ b/xlators/features/namespace/src/namespace.h
@@ -6,8 +6,8 @@
#include "config.h"
#endif
-#include "xlator.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
#define GF_NAMESPACE "namespace"
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 914bfb22ed0..416456b13af 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __QUIESCE_MEM_TYPES_H__
#define __QUIESCE_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quiesce_mem_types_ {
gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h
index 864a18147dc..32ffd409807 100644
--- a/xlators/features/quiesce/src/quiesce-messages.h
+++ b/xlators/features/quiesce/src/quiesce-messages.h
@@ -11,7 +11,7 @@
#ifndef __QUIESCE_MESSAGES_H__
#define __QUIESCE_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index 41912b7e882..0e5eb60a16f 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
#include "quiesce.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
/* TODO: */
/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
@@ -89,13 +89,14 @@ gf_quiesce_populate_failover_hosts(xlator_t *this, quiesce_priv_t *priv,
if (!dup_val)
goto out;
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
LOCK(&priv->lock);
{
if (!list_empty(&priv->failover_list))
__gf_quiesce_cleanup_failover_hosts(this, priv);
- addr_tok = strtok_r(dup_val, ",", &save_ptr);
+
while (addr_tok) {
- if (!valid_internet_address(addr_tok, _gf_true)) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_false)) {
gf_msg(this->name, GF_LOG_INFO, 0, QUIESCE_MSG_INVAL_HOST,
"Specified "
"invalid internet address:%s",
@@ -1192,6 +1193,33 @@ quiesce_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
}
int32_t
+quiesce_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+ }
+
+ stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
+
+int32_t
quiesce_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
@@ -2364,19 +2392,10 @@ quiesce_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
priv = this->private;
if (priv && priv->pass_through) {
- local = mem_get0(priv->local_pool);
- local->fd = fd_ref(fd);
- local->offset = offset;
- local->len = len;
- local->flag = mode;
-
- frame->local = local;
-
STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
xdata);
@@ -2536,6 +2555,7 @@ fini(xlator_t *this)
this->private = NULL;
mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
LOCK_DESTROY(&priv->lock);
GF_FREE(priv);
out:
@@ -2592,7 +2612,9 @@ struct xlator_fops fops = {
.truncate = quiesce_truncate,
.ftruncate = quiesce_ftruncate,
.setxattr = quiesce_setxattr,
+ .fsetxattr = quiesce_fsetxattr,
.removexattr = quiesce_removexattr,
+ .fremovexattr = quiesce_fremovexattr,
.symlink = quiesce_symlink,
.unlink = quiesce_unlink,
.link = quiesce_link,
@@ -2625,6 +2647,7 @@ struct xlator_fops fops = {
.access = quiesce_access,
.readlink = quiesce_readlink,
.getxattr = quiesce_getxattr,
+ .fgetxattr = quiesce_fgetxattr,
.open = quiesce_open,
.readv = quiesce_readv,
.flush = quiesce_flush,
@@ -2664,3 +2687,18 @@ struct volume_options options[] = {
"the thin clients can failover to."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quiesce",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index ed8f8fa2934..6ab2af40a56 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -13,8 +13,8 @@
#include "quiesce-mem-types.h"
#include "quiesce-messages.h"
-#include "xlator.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
#define GF_FOPS_EXPECTED_IN_PARALLEL 512
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 0ae47fc189f..1c2dcef0ca3 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -4,7 +4,7 @@ endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
quota_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-quotad_la_LDFLAGS = -module -export-symbols $(top_srcdir)/xlators/features/quota/src/quotad.sym $(GF_XLATOR_LDFLAGS)
+quotad_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quota_la_SOURCES = quota.c quota-enforcer-client.c
quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
@@ -27,6 +27,3 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
-
-EXTRA_DIST = quotad.sym
-
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
index 1a4c2e30dd6..480d64ade27 100644
--- a/xlators/features/quota/src/quota-enforcer-client.c
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -32,12 +32,6 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "quota.h"
#include "quota-messages.h"
@@ -362,16 +356,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata,
{
xlator_t *this = NULL;
int ret = 0;
+ quota_priv_t *priv = NULL;
this = mydata;
-
+ priv = this->private;
switch (event) {
case RPC_CLNT_CONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_true;
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT");
break;
}
case RPC_CLNT_DISCONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_false;
+ pthread_cond_signal(&priv->conn_cond);
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT");
break;
}
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index e04d2e846cd..782a7de96bb 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -10,9 +10,10 @@
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quota_mem_types_ {
+ /* Those are used by QUOTA_ALLOC_OR_GOTO macro */
gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
gf_quota_mt_quota_inode_ctx_t,
gf_quota_mt_loc_t,
diff --git a/xlators/features/quota/src/quota-messages.h b/xlators/features/quota/src/quota-messages.h
index 5129d2ee9ad..d434ed75e76 100644
--- a/xlators/features/quota/src/quota-messages.h
+++ b/xlators/features/quota/src/quota-messages.h
@@ -11,7 +11,7 @@
#ifndef _QUOTA_MESSAGES_H_
#define _QUOTA_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 9ad4f902d08..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -7,15 +7,11 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include <fnmatch.h>
#include "quota.h"
-#include "common-utils.h"
-#include "defaults.h"
-#include "statedump.h"
-#include "quota-common-utils.h"
+#include <glusterfs/statedump.h>
#include "quota-messages.h"
-#include "events.h"
+#include <glusterfs/events.h>
struct volume_options options[];
@@ -564,15 +560,14 @@ quota_handle_validate_error(call_frame_t *frame, int32_t op_ret,
if (local == NULL)
goto out;
- LOCK(&local->lock);
- {
- if (op_ret < 0) {
+ if (op_ret < 0) {
+ LOCK(&local->lock);
+ {
local->op_ret = op_ret;
local->op_errno = op_errno;
}
+ UNLOCK(&local->lock);
}
- UNLOCK(&local->lock);
-
/* we abort checking limits on this path to root */
quota_link_count_decrement(frame);
out:
@@ -591,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -631,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* loop of validation and checking
* limit when timeout is zero.
*/
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -649,64 +640,51 @@ unwind:
return 0;
}
-static uint64_t
-quota_time_elapsed(struct timeval *now, struct timeval *then)
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
{
- return (now->tv_sec - then->tv_sec);
-}
-
-int32_t
-quota_timeout(struct timeval *tv, int32_t timeout)
-{
- struct timeval now = {
- 0,
- };
- int32_t timed_out = 0;
-
- gettimeofday(&now, NULL);
-
- if (quota_time_elapsed(&now, tv) >= timeout) {
- timed_out = 1;
- }
-
- return timed_out;
+ return (gf_time() - t) >= timeout;
}
/* Return: 1 if new entry added
* 0 no entry added
+ * -1 on errors
*/
static int32_t
quota_add_parent(struct list_head *list, char *name, uuid_t pgfid)
{
quota_dentry_t *entry = NULL;
gf_boolean_t found = _gf_false;
+ int ret = 0;
- if (list == NULL) {
- goto out;
- }
-
- list_for_each_entry(entry, list, next)
- {
- if (gf_uuid_compare(pgfid, entry->par) == 0) {
- found = _gf_true;
- goto out;
+ if (!list_empty(list)) {
+ list_for_each_entry(entry, list, next)
+ {
+ if (gf_uuid_compare(pgfid, entry->par) == 0) {
+ found = _gf_true;
+ goto out;
+ }
}
}
entry = __quota_dentry_new(NULL, name, pgfid);
if (entry)
list_add_tail(&entry->next, list);
+ else
+ ret = -1;
out:
if (found)
return 0;
- else
+ else if (ret == 0)
return 1;
+ else
+ return -1;
}
/* This function iterates the parent list in inode
* context and add unique parent to the list
- * Returns number of dentry added to the list
+ * Returns number of dentry added to the list, or -1 on errors
*/
static int32_t
quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list)
@@ -723,15 +701,16 @@ quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list)
list_for_each_entry(dentry, &ctx->parents, next)
{
ret = quota_add_parent(list, dentry->name, dentry->par);
-
if (ret == 1)
count++;
+ else if (ret == -1)
+ break;
}
}
UNLOCK(&ctx->lock);
out:
- return count;
+ return (ret == -1) ? -1 : count;
}
int32_t
@@ -750,10 +729,9 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_dentry_t *dentry = NULL;
quota_dentry_t *tmp = NULL;
quota_inode_ctx_t *ctx = NULL;
- struct list_head parents = {
- 0,
- };
+ struct list_head parents;
quota_local_t *local = NULL;
+ int ret;
INIT_LIST_HEAD(&parents);
@@ -828,7 +806,11 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
- quota_add_parents_from_ctx(ctx, &parents);
+ ret = quota_add_parents_from_ctx(ctx, &parents);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
+ }
if (list_empty(&parents)) {
/* we built ancestry for a directory */
@@ -843,7 +825,11 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
GF_ASSERT (&entry->list != &entries->list);
*/
- quota_add_parent(&parents, entry->d_name, parent->gfid);
+ ret = quota_add_parent(&parents, entry->d_name, parent->gfid);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
+ }
}
local->ancestry_cbk(&parents, local->loc.inode, 0, 0, local->ancestry_data);
@@ -861,9 +847,11 @@ cleanup:
parent = NULL;
}
- list_for_each_entry_safe(dentry, tmp, &parents, next)
- {
- __quota_dentry_free(dentry);
+ if (!list_empty(&parents)) {
+ list_for_each_entry_safe(dentry, tmp, &parents, next)
+ {
+ __quota_dentry_free(dentry);
+ }
}
return 0;
@@ -1119,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if ((object_aggr_count) > ctx->object_hard_lim) {
hard_limit_exceeded = 1;
@@ -1186,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if (wouldbe_size >= ctx->hard_lim) {
hard_limit_exceeded = 1;
@@ -1755,19 +1743,13 @@ quota_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
if ((op_errno == EDQUOT) && (local->space_available > 0)) {
new_count = iov_subset(vector, count, 0, local->space_available,
- NULL);
-
- new_vector = GF_CALLOC(new_count, sizeof(struct iovec),
- gf_common_mt_iovec);
- if (new_vector == NULL) {
+ &new_vector, 0);
+ if (new_count < 0) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto unwind;
}
- new_count = iov_subset(vector, count, 0, local->space_available,
- new_vector);
-
vector = new_vector;
count = new_count;
} else if (op_errno == ENOENT || op_errno == ESTALE) {
@@ -1839,9 +1821,7 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
quota_inode_ctx_t *ctx = NULL;
quota_dentry_t *dentry = NULL, *tmp = NULL;
call_stub_t *stub = NULL;
- struct list_head head = {
- 0,
- };
+ struct list_head head;
inode_t *par_inode = NULL;
priv = this->private;
@@ -1881,9 +1861,13 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
- size = iov_length(vector, count);
-
parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
+
+ size = iov_length(vector, count);
LOCK(&local->lock);
{
@@ -1905,10 +1889,12 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
par_inode = do_quota_check_limit(frame, fd->inode, this, dentry,
_gf_false);
if (par_inode == NULL) {
- /* remove stale entry from inode ctx */
- quota_dentry_del(ctx, dentry->name, dentry->par);
- parents--;
- fail_count++;
+ if (ctx) {
+ /* remove stale entry from inode ctx */
+ quota_dentry_del(ctx, dentry->name, dentry->par);
+ parents--;
+ fail_count++;
+ }
} else {
inode_unref(par_inode);
}
@@ -3276,12 +3262,11 @@ quota_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
@@ -3355,12 +3340,11 @@ quota_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
@@ -3650,12 +3634,11 @@ quota_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (statpost)
- ctx->buf = *statpost;
+ if (statpost) {
+ LOCK(&ctx->lock);
+ ctx->buf = *statpost;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost,
@@ -4310,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -4344,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
}
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -4805,6 +4784,10 @@ quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
/*
* Note that by using len as the delta we're assuming the range from
@@ -4865,7 +4848,7 @@ off:
void
quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
- char **path, struct timeval *cur_time)
+ char **path, time_t *cur_time)
{
xlator_t *this = THIS;
@@ -4884,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
if (!(*path))
*path = uuid_utoa(inode->gfid);
- gettimeofday(cur_time, NULL);
+ *cur_time = gf_time();
}
/* Logs if
@@ -4895,9 +4878,7 @@ void
quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta)
{
- struct timeval cur_time = {
- 0,
- };
+ time_t cur_time = 0;
char *usage_str = NULL;
char *path = NULL;
int64_t cur_size = 0;
@@ -4923,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
/* Usage is above soft limit */
else if (cur_size > ctx->soft_lim &&
- quota_timeout(&ctx->prev_log, priv->log_timeout)) {
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
@@ -4939,9 +4920,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
+ if (path)
+ GF_FREE(path);
+
if (usage_str)
GF_FREE(usage_str);
}
@@ -4997,6 +4981,43 @@ quota_forget(xlator_t *this, inode_t *inode)
return 0;
}
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ rpc_clnt_t *rpc = NULL;
+ gf_boolean_t conn_status = _gf_true;
+ xlator_t *victim = data;
+
+ priv = this->private;
+ if (!priv || !priv->is_quota_on)
+ goto out;
+
+ if (event == GF_EVENT_PARENT_DOWN) {
+ rpc = priv->rpc_clnt;
+ if (rpc) {
+ rpc_clnt_disable(rpc);
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ conn_status = priv->conn_status;
+ while (conn_status) {
+ (void)pthread_cond_wait(&priv->conn_cond,
+ &priv->conn_mutex);
+ conn_status = priv->conn_status;
+ }
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
+ }
+ }
+
+out:
+ ret = default_notify(this, event, data);
+ return ret;
+}
+
int32_t
init(xlator_t *this)
{
@@ -5039,6 +5060,10 @@ init(xlator_t *this)
goto err;
}
+ pthread_mutex_init(&priv->conn_mutex, NULL);
+ pthread_cond_init(&priv->conn_cond, NULL);
+ priv->conn_status = _gf_false;
+
if (priv->is_quota_on) {
rpc = quota_enforcer_init(this, this->options);
if (rpc == NULL) {
@@ -5132,13 +5157,14 @@ quota_priv_dump(xlator_t *this)
if (ret)
goto out;
else {
- gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
- gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
- gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
- gf_proc_dump_write("validation-count", "%ld", priv->validation_count);
+ gf_proc_dump_write("validation-count", "%" PRIu64,
+ priv->validation_count);
}
UNLOCK(&priv->lock);
@@ -5151,20 +5177,22 @@ fini(xlator_t *this)
{
quota_priv_t *priv = NULL;
rpc_clnt_t *rpc = NULL;
- int i = 0, cnt = 0;
priv = this->private;
if (!priv)
return;
rpc = priv->rpc_clnt;
priv->rpc_clnt = NULL;
- this->private = NULL;
if (rpc) {
- cnt = GF_ATOMIC_GET(rpc->refcount);
- for (i = 0; i < cnt; i++)
- rpc_clnt_unref(rpc);
+ rpc_clnt_connection_cleanup(&rpc->conn);
+ rpc_clnt_unref(rpc);
}
+
+ this->private = NULL;
LOCK_DESTROY(&priv->lock);
+ pthread_mutex_destroy(&priv->conn_mutex);
+ pthread_cond_destroy(&priv->conn_cond);
+
GF_FREE(priv);
if (this->local_pool) {
mem_pool_destroy(this->local_pool);
@@ -5292,3 +5320,17 @@ struct volume_options options[] = {
.tags = {},
},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quota",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 8bcc3ec6176..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -10,25 +10,22 @@
#ifndef _QUOTA_H
#define _QUOTA_H
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
+#include <glusterfs/call-stub.h>
#include "quota-mem-types.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "gf-event.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
#include "rpcsvc.h"
#include "rpc-clnt.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "glusterfs3-xdr.h"
#include "glusterfs3.h"
#include "xdr-generic.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "protocol-common.h"
-#include "quota-common-utils.h"
+#include <glusterfs/quota-common-utils.h>
#include "quota-messages.h"
#define DIRTY "dirty"
@@ -156,8 +153,8 @@ struct quota_inode_ctx {
int64_t object_soft_lim;
struct iatt buf;
struct list_head parents;
- struct timeval tv;
- struct timeval prev_log;
+ time_t validate_time;
+ time_t prev_log_time;
gf_boolean_t ancestry_built;
gf_lock_t lock;
};
@@ -202,6 +199,7 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
+ /* FIXME: consider time_t for timeouts. */
uint32_t soft_timeout;
uint32_t hard_timeout;
uint32_t log_timeout;
@@ -217,6 +215,9 @@ struct quota_priv {
char *volume_uuid;
uint64_t validation_count;
int32_t quotad_conn_status;
+ pthread_mutex_t conn_mutex;
+ pthread_cond_t conn_cond;
+ gf_boolean_t conn_status;
};
typedef struct quota_priv quota_priv_t;
diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c
index e0129e4f63a..75d47867b5b 100644
--- a/xlators/features/quota/src/quotad-aggregator.c
+++ b/xlators/features/quota/src/quotad-aggregator.c
@@ -13,7 +13,14 @@
#include "quotad-helpers.h"
#include "quotad-aggregator.h"
-struct rpcsvc_program quotad_aggregator_prog;
+static char *qd_ext_xattrs[] = {
+ QUOTA_SIZE_KEY,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ NULL,
+};
+
+static struct rpcsvc_program quotad_aggregator_prog;
struct iobuf *
quotad_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
@@ -132,13 +139,16 @@ quotad_aggregator_getlimit_cbk(xlator_t *this, call_frame_t *frame,
int ret = -1;
int type = 0;
+ if (!rsp || (rsp->op_ret == -1))
+ goto reply;
+
GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp->xdata.xdata_val),
(rsp->xdata.xdata_len), rsp->op_ret,
rsp->op_errno, out);
if (xdata) {
state = frame->root->state;
- ret = dict_get_int32n(state->xdata, "type", SLEN("type"), &type);
+ ret = dict_get_int32n(state->req_xdata, "type", SLEN("type"), &type);
if (ret < 0)
goto out;
@@ -166,8 +176,9 @@ out:
}
reply:
- quotad_aggregator_submit_reply(frame, frame->local, (void *)&cli_rsp, NULL,
- 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ quotad_aggregator_submit_reply(frame, (frame) ? frame->local : NULL,
+ (void *)&cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
dict_unref(xdata);
GF_FREE(cli_rsp.dict.dict_val);
@@ -182,22 +193,20 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
{0},
};
gf_cli_rsp cli_rsp = {0};
- gfs3_lookup_req args = {
- {
- 0,
- },
- };
quotad_aggregator_state_t *state = NULL;
xlator_t *this = NULL;
dict_t *dict = NULL;
int ret = -1, op_errno = 0;
char *gfid_str = NULL;
uuid_t gfid = {0};
+ char *volume_uuid = NULL;
GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
this = THIS;
+ cli_req.dict.dict_val = alloca(req->msg[0].iov_len);
+
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
@@ -224,6 +233,11 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
goto err;
}
+ ret = dict_get_strn(dict, "volume-uuid", SLEN("volume-uuid"), &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
gf_uuid_parse((const char *)gfid_str, gfid);
frame = quotad_aggregator_get_frame_from_req(req);
@@ -232,7 +246,9 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
goto errx;
}
state = frame->root->state;
- state->xdata = dict;
+ state->req_xdata = dict;
+ state->xdata = dict_new();
+ dict = NULL;
ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_KEY, 42);
if (ret)
@@ -253,13 +269,8 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
if (ret)
goto err;
- memcpy(&args.gfid, &gfid, 16);
-
- args.bname = alloca(req->msg[0].iov_len);
- args.xdata.xdata_val = alloca(req->msg[0].iov_len);
-
- ret = qd_nameless_lookup(this, frame, &args, state->xdata,
- quotad_aggregator_getlimit_cbk);
+ ret = qd_nameless_lookup(this, frame, (char *)gfid, state->xdata,
+ volume_uuid, quotad_aggregator_getlimit_cbk);
if (ret) {
cli_rsp.op_errno = ret;
goto errx;
@@ -276,7 +287,6 @@ errx:
quotad_aggregator_getlimit_cbk(this, frame, &cli_rsp);
if (dict)
dict_unref(dict);
-
return ret;
}
@@ -299,12 +309,14 @@ quotad_aggregator_lookup(rpcsvc_request_t *req)
0,
},
};
- int ret = -1, op_errno = 0;
+ int i = 0, ret = -1, op_errno = 0;
gfs3_lookup_rsp rsp = {
0,
};
quotad_aggregator_state_t *state = NULL;
xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ char *volume_uuid = NULL;
GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
@@ -327,16 +339,34 @@ quotad_aggregator_lookup(rpcsvc_request_t *req)
state = frame->root->state;
- GF_PROTOCOL_DICT_UNSERIALIZE(this, state->xdata, (args.xdata.xdata_val),
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, (args.xdata.xdata_val),
(args.xdata.xdata_len), ret, op_errno, err);
- ret = qd_nameless_lookup(this, frame, &args, state->xdata,
+ ret = dict_get_str(dict, "volume-uuid", &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
+ state->xdata = dict_new();
+
+ for (i = 0; qd_ext_xattrs[i]; i++) {
+ if (dict_get(dict, qd_ext_xattrs[i])) {
+ ret = dict_set_uint32(state->xdata, qd_ext_xattrs[i], 1);
+ if (ret < 0)
+ goto err;
+ }
+ }
+
+ ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, volume_uuid,
quotad_aggregator_lookup_cbk);
if (ret) {
rsp.op_errno = ret;
goto err;
}
+ if (dict)
+ dict_unref(dict);
+
return ret;
err:
@@ -344,6 +374,9 @@ err:
rsp.op_errno = op_errno;
quotad_aggregator_lookup_cbk(this, frame, &rsp);
+ if (dict)
+ dict_unref(dict);
+
return ret;
}
@@ -445,15 +478,15 @@ out:
return ret;
}
-rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
- [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0, DRC_NA},
- [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL,
- quotad_aggregator_lookup, NULL, 0, DRC_NA},
- [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT,
- quotad_aggregator_getlimit, NULL, 0, DRC_NA},
+static rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL, NULL, GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", quotad_aggregator_lookup, NULL,
+ GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", quotad_aggregator_getlimit, NULL,
+ GF_AGGREGATOR_GETLIMIT, DRC_NA, 0},
};
-struct rpcsvc_program quotad_aggregator_prog = {
+static struct rpcsvc_program quotad_aggregator_prog = {
.progname = "GlusterFS 3.3",
.prognum = GLUSTER_AGGREGATOR_PROGRAM,
.progver = GLUSTER_AGGREGATOR_VERSION,
diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h
index 02a0094102f..706592c7d50 100644
--- a/xlators/features/quota/src/quotad-aggregator.h
+++ b/xlators/features/quota/src/quotad-aggregator.h
@@ -12,9 +12,9 @@
#define _QUOTAD_AGGREGATOR_H
#include "quota.h"
-#include "stack.h"
+#include <glusterfs/stack.h>
#include "glusterfs3-xdr.h"
-#include "inode.h"
+#include <glusterfs/inode.h>
typedef struct {
void *pool;
@@ -23,13 +23,15 @@ typedef struct {
inode_table_t *itable;
loc_t loc;
dict_t *xdata;
+ dict_t *req_xdata;
} quotad_aggregator_state_t;
typedef int (*quotad_aggregator_lookup_cbk_t)(xlator_t *this,
call_frame_t *frame, void *rsp);
int
-qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
- dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk);
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk);
int
quotad_aggregator_init(xlator_t *this);
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
index be8f9080f14..51ff1d7e98d 100644
--- a/xlators/features/quota/src/quotad-helpers.c
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -47,6 +47,9 @@ quotad_aggregator_free_state(quotad_aggregator_state_t *state)
if (state->xdata)
dict_unref(state->xdata);
+ if (state->req_xdata)
+ dict_unref(state->req_xdata);
+
GF_FREE(state);
}
@@ -73,7 +76,6 @@ quotad_aggregator_alloc_frame(rpcsvc_request_t *req)
goto out;
frame->root->state = state;
- frame->root->unique = 0;
frame->this = this;
out:
@@ -93,8 +95,6 @@ quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req)
frame->root->op = req->procnum;
- frame->root->unique = req->xid;
-
frame->root->uid = req->uid;
frame->root->gid = req->gid;
frame->root->pid = req->pid;
diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c
index 5b0ab83673b..643f25c9c2a 100644
--- a/xlators/features/quota/src/quotad.c
+++ b/xlators/features/quota/src/quotad.c
@@ -9,7 +9,6 @@
*/
#include "quota.h"
#include "quotad-aggregator.h"
-#include "common-utils.h"
int
qd_notify(xlator_t *this, int32_t event, void *data, ...)
@@ -104,8 +103,9 @@ out:
}
int
-qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
- dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk)
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk)
{
gfs3_lookup_rsp rsp = {
0,
@@ -116,7 +116,6 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
};
quotad_aggregator_state_t *state = NULL;
xlator_t *subvol = NULL;
- char *volume_uuid = NULL;
state = frame->root->state;
@@ -128,14 +127,7 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
goto out;
}
- memcpy(loc.gfid, req->gfid, 16);
-
- ret = dict_get_strn(xdata, "volume-uuid", SLEN("volume-uuid"),
- &volume_uuid);
- if (ret < 0) {
- op_errno = EINVAL;
- goto out;
- }
+ memcpy(loc.gfid, gfid, 16);
ret = dict_set_int8(xdata, QUOTA_READ_ONLY_KEY, 1);
if (ret < 0) {
@@ -220,11 +212,6 @@ err:
return ret;
}
-class_methods_t class_methods = {.init = qd_init,
- .fini = qd_fini,
- .reconfigure = qd_reconfigure,
- .notify = qd_notify};
-
struct xlator_fops fops = {};
struct xlator_cbks cbks = {};
@@ -240,4 +227,19 @@ struct volume_options options[] = {
.key = {"transport.*"},
.type = GF_OPTION_TYPE_ANY,
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = qd_init,
+ .fini = qd_fini,
+ .reconfigure = qd_reconfigure,
+ .notify = qd_notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quotad",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/quota/src/quotad.sym b/xlators/features/quota/src/quotad.sym
deleted file mode 100644
index 0829ffe1584..00000000000
--- a/xlators/features/quota/src/quotad.sym
+++ /dev/null
@@ -1,7 +0,0 @@
-fops
-cbks
-class_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
index 39985169991..9640e7e3eee 100644
--- a/xlators/features/read-only/src/read-only-common.c
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -9,7 +9,7 @@
*/
#include "read-only.h"
#include "read-only-mem-types.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
gf_boolean_t
is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this)
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
index 32719da28f1..5561961ffa2 100644
--- a/xlators/features/read-only/src/read-only-common.h
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -7,8 +7,8 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
gf_boolean_t
is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this);
diff --git a/xlators/features/read-only/src/read-only-mem-types.h b/xlators/features/read-only/src/read-only-mem-types.h
index 4baaeb41216..c67d6c02cd0 100644
--- a/xlators/features/read-only/src/read-only-mem-types.h
+++ b/xlators/features/read-only/src/read-only-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __READONLY_MEM_TYPES_H__
#define __READONLY_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_read_only_mem_types_ {
gf_read_only_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index c92a9801196..48654998e63 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -7,7 +7,6 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "defaults.h"
#include "read-only-common.h"
#include "read-only-mem-types.h"
#include "read-only.h"
@@ -130,3 +129,16 @@ struct volume_options options[] = {
"\"off\" by default."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "read-only",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h
index d74053a2a8f..aced5d3c577 100644
--- a/xlators/features/read-only/src/read-only.h
+++ b/xlators/features/read-only/src/read-only.h
@@ -11,25 +11,26 @@
#ifndef __READONLY_H__
#define __READONLY_H__
-#include "read-only-mem-types.h"
-#include "xlator.h"
+#include <stdint.h> // for uint64_t, uint8_t
+#include <sys/time.h> // for time_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
typedef struct {
uint8_t worm : 1;
uint8_t retain : 1;
uint8_t legal_hold : 1;
uint8_t ret_mode : 1;
- uint64_t ret_period;
- uint64_t auto_commit_period;
+ int64_t ret_period;
+ int64_t auto_commit_period;
} worm_reten_state_t;
typedef struct {
gf_boolean_t readonly_or_worm_enabled;
gf_boolean_t worm_file;
gf_boolean_t worm_files_deletable;
- uint64_t reten_period;
- uint64_t com_period;
- char *reten_mode;
+ int64_t reten_period;
+ int64_t com_period;
+ int reten_mode;
time_t start_time;
} read_only_priv_t;
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
index 3f882fe08d6..df45f2a940b 100644
--- a/xlators/features/read-only/src/worm-helper.c
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -9,8 +9,8 @@
*/
#include "read-only-mem-types.h"
#include "read-only.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
#include "worm-helper.h"
/*Function to check whether file is read-only.
@@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
GF_VALIDATE_OR_GOTO("worm", this, out);
GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
- start_time = time(NULL);
+ start_time = gf_time();
dict = dict_new();
if (!dict) {
gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
@@ -84,10 +84,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
retention_state->worm = 1;
retention_state->retain = 1;
retention_state->legal_hold = 0;
- if (strcmp(priv->reten_mode, "relax") == 0)
- retention_state->ret_mode = 0;
- else
- retention_state->ret_mode = 1;
+ retention_state->ret_mode = priv->reten_mode;
retention_state->ret_period = priv->reten_period;
retention_state->auto_commit_period = priv->com_period;
if (fop_with_fd)
@@ -97,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
if (ret)
goto out;
stbuf->ia_mtime = stpre.ia_mtime;
- stbuf->ia_atime = time(NULL) + retention_state->ret_period;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
if (fop_with_fd)
ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
@@ -289,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
{
int op_errno = EROFS;
int ret = -1;
+ time_t now = 0;
uint64_t com_period = 0;
uint64_t start_time = 0;
dict_t *dict = NULL;
@@ -340,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
goto out;
}
- if (ret == -1 && (time(NULL) - start_time) >= com_period) {
- if ((time(NULL) - stbuf.ia_mtime) >= com_period) {
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
&stbuf);
if (ret) {
@@ -355,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
op_errno = 0;
goto out;
}
- } else if (ret == -1 && (time(NULL) - start_time) < com_period) {
+ } else if (ret == -1 && (now - start_time) < com_period) {
op_errno = 0;
goto out;
- } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) {
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
}
if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
index db128b75196..1cc5526d5cd 100644
--- a/xlators/features/read-only/src/worm.c
+++ b/xlators/features/read-only/src/worm.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "read-only-common.h"
#include "read-only-mem-types.h"
#include "read-only.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "worm-helper.h"
int32_t
@@ -292,6 +292,12 @@ worm_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
}
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_false, loc);
+ if (ret) {
+ goto out;
+ }
stbuf->ia_mtime = stpre.ia_mtime;
}
}
@@ -372,6 +378,13 @@ worm_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
goto out;
}
}
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_true, fd);
+ if (ret) {
+ goto out;
+ }
+
stbuf->ia_mtime = stpre.ia_mtime;
}
}
@@ -427,29 +440,22 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = 0;
read_only_priv_t *priv = NULL;
- dict_t *dict = NULL;
+ // In case of an error exit because fd can be NULL and this would
+ // cause an segfault when performing fsetxattr . We explicitly
+ // unwind to avoid future problems
+ if (op_ret < 0) {
+ goto out;
+ }
priv = this->private;
GF_ASSERT(priv);
if (priv->worm_file) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR,
- "Error creating the "
- "dict");
- goto out;
- }
- ret = dict_set_int8(dict, "trusted.worm_file", 1);
+ ret = fd_ctx_set(fd, this, 1);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
- "Error in setting "
- "the dict");
- goto out;
- }
- ret = syncop_fsetxattr(this, fd, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
- goto out;
+ "Failed to set the fd ctx "
+ "for gfid:%s . Worm feature may not work for the gfid",
+ uuid_utoa(inode->gfid));
}
ret = worm_init_state(this, _gf_true, fd);
if (ret) {
@@ -460,8 +466,6 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
out:
STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
preparent, postparent, xdata);
- if (dict)
- dict_unref(dict);
return ret;
}
@@ -475,11 +479,21 @@ worm_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
return 0;
}
+static void
+set_reten_mode(read_only_priv_t *priv, char *reten_mode)
+{
+ if (strcmp(reten_mode, "relax") == 0)
+ priv->reten_mode = 0;
+ else
+ priv->reten_mode = 1;
+}
+
int32_t
init(xlator_t *this)
{
int ret = -1;
read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
if (!this->children || this->children->next) {
gf_log(this->name, GF_LOG_ERROR,
@@ -509,9 +523,10 @@ init(xlator_t *this)
GF_OPTION_INIT("worm", priv->readonly_or_worm_enabled, bool, out);
GF_OPTION_INIT("worm-file-level", priv->worm_file, bool, out);
- GF_OPTION_INIT("default-retention-period", priv->reten_period, uint64, out);
- GF_OPTION_INIT("auto-commit-period", priv->com_period, uint64, out);
- GF_OPTION_INIT("retention-mode", priv->reten_mode, str, out);
+ GF_OPTION_INIT("default-retention-period", priv->reten_period, int64, out);
+ GF_OPTION_INIT("auto-commit-period", priv->com_period, int64, out);
+ GF_OPTION_INIT("retention-mode", reten_mode, str, out);
+ set_reten_mode(priv, reten_mode);
GF_OPTION_INIT("worm-files-deletable", priv->worm_files_deletable, bool,
out);
@@ -524,6 +539,7 @@ int
reconfigure(xlator_t *this, dict_t *options)
{
read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
int ret = -1;
priv = this->private;
@@ -533,9 +549,10 @@ reconfigure(xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF("worm-file-level", priv->worm_file, options, bool, out);
GF_OPTION_RECONF("default-retention-period", priv->reten_period, options,
- uint64, out);
- GF_OPTION_RECONF("retention-mode", priv->reten_mode, options, str, out);
- GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, uint64,
+ int64, out);
+ GF_OPTION_RECONF("retention-mode", reten_mode, options, str, out);
+ set_reten_mode(priv, reten_mode);
+ GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, int64,
out);
GF_OPTION_RECONF("worm-files-deletable", priv->worm_files_deletable,
options, bool, out);
@@ -556,6 +573,7 @@ fini(xlator_t *this)
mem_put(priv);
this->private = NULL;
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
out:
return;
}
@@ -583,7 +601,62 @@ struct xlator_fops fops = {
.lk = ro_lk,
};
-struct xlator_cbks cbks;
+int32_t
+worm_release(xlator_t *this, fd_t *fd)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ dict = dict_new();
+ uint64_t value = 0;
+ loc_t loc = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ priv = this->private;
+
+ if (priv->worm_file) {
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+
+ ret = fd_ctx_get(fd, this, &value);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx");
+ }
+ if (!value) {
+ goto out;
+ }
+
+ ret = dict_set_int8(dict, "trusted.worm_file", 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error in setting "
+ "the dict");
+ goto out;
+ }
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+ ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+
+ gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE);
+ }
+
+out:
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
+}
+
+struct xlator_cbks cbks = {
+ .release = worm_release,
+};
struct volume_options options[] = {
{.key = {"worm"},
@@ -634,3 +707,16 @@ struct volume_options options[] = {
.description = "Auto commit period for the files."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "worm",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/src/sdfs-messages.h b/xlators/features/sdfs/src/sdfs-messages.h
index cf866c8512a..3053efa8935 100644
--- a/xlators/features/sdfs/src/sdfs-messages.h
+++ b/xlators/features/sdfs/src/sdfs-messages.h
@@ -11,7 +11,7 @@
#ifndef _DFS_MESSAGES_H_
#define _DFS_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* file bit-rot-bitd-messages.h
* brief SDFS log-message IDs and their descriptions
diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c
index 5dbe0653cbc..aaf13f0852e 100644
--- a/xlators/features/sdfs/src/sdfs.c
+++ b/xlators/features/sdfs/src/sdfs.c
@@ -139,6 +139,8 @@ sdfs_get_new_frame_common(call_frame_t *frame, call_frame_t **new_frame)
}
local->main_frame = frame;
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&(*new_frame)->root->lk_owner, (*new_frame)->root);
ret = 0;
err:
@@ -175,9 +177,10 @@ sdfs_get_new_frame(call_frame_t *frame, loc_t *loc, call_frame_t **new_frame)
ret = 0;
err:
- if ((ret < 0) && (*new_frame != NULL)) {
+ if (ret && (*new_frame)) {
SDFS_STACK_DESTROY((*new_frame));
*new_frame = NULL;
+ ret = -1;
}
return ret;
@@ -868,6 +871,8 @@ sdfs_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
op_errno = ENOMEM;
goto err;
}
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
gf_client_ref(client);
new_frame->root->client = client;
@@ -1121,6 +1126,8 @@ sdfs_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
op_errno = ENOMEM;
goto err;
}
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
gf_client_ref(client);
new_frame->root->client = client;
@@ -1425,12 +1432,12 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
mem_pool_destroy(this->local_pool);
-
- return 0;
+ this->local_pool = NULL;
+ return;
}
struct xlator_fops fops = {
@@ -1451,10 +1458,22 @@ struct xlator_cbks cbks;
struct volume_options options[] = {
{.key = {"pass-through"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
+ .default_value = "true",
.op_version = {GD_OP_VERSION_4_1_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
.tags = {"sdfs"},
.description = "Enable/Disable dentry serialize functionality"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sdfs",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/src/sdfs.h b/xlators/features/sdfs/src/sdfs.h
index 986d7c2731c..dded5a2d7fc 100644
--- a/xlators/features/sdfs/src/sdfs.h
+++ b/xlators/features/sdfs/src/sdfs.h
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
#include "sdfs-messages.h"
-#include "atomic.h"
+#include <glusterfs/atomic.h>
#define SDFS_LOCK_COUNT_MAX 2
diff --git a/xlators/features/selinux/src/selinux-mem-types.h b/xlators/features/selinux/src/selinux-mem-types.h
index a8c544fba52..553e59e5a9d 100644
--- a/xlators/features/selinux/src/selinux-mem-types.h
+++ b/xlators/features/selinux/src/selinux-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __SELINUX_MEM_TYPES_H__
#define __SELINUX_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_selinux_mem_types_ {
gf_selinux_mt_selinux_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/selinux/src/selinux-messages.h b/xlators/features/selinux/src/selinux-messages.h
index 1f5739d8dc7..f49a54f956c 100644
--- a/xlators/features/selinux/src/selinux-messages.h
+++ b/xlators/features/selinux/src/selinux-messages.h
@@ -11,7 +11,7 @@
#ifndef _SELINUX_MESSAGES_H__
#define _SELINUX_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
index 91e74d1a3fc..9b1b4b55e1a 100644
--- a/xlators/features/selinux/src/selinux.c
+++ b/xlators/features/selinux/src/selinux.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "selinux.h"
#include "selinux-messages.h"
#include "selinux-mem-types.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
static int
selinux_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -234,7 +234,6 @@ init(xlator_t *this)
priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t);
if (!priv) {
gf_log(this->name, GF_LOG_ERROR, "out of memory");
- ret = ENOMEM;
goto out;
}
@@ -242,7 +241,6 @@ init(xlator_t *this)
this->local_pool = mem_pool_new(selinux_priv_t, 64);
if (!this->local_pool) {
- ret = -1;
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM,
"Failed to create local_t's memory pool");
goto out;
@@ -252,10 +250,9 @@ init(xlator_t *this)
ret = 0;
out:
if (ret) {
- if (priv) {
- GF_FREE(priv);
- }
+ GF_FREE(priv);
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
return ret;
}
@@ -284,6 +281,7 @@ fini(xlator_t *this)
GF_FREE(priv);
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
return;
}
@@ -310,3 +308,16 @@ struct volume_options options[] = {
{
.key = {NULL},
}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "selinux",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/selinux/src/selinux.h b/xlators/features/selinux/src/selinux.h
index 787bff348f0..1bbdad3bb36 100644
--- a/xlators/features/selinux/src/selinux.h
+++ b/xlators/features/selinux/src/selinux.h
@@ -10,7 +10,7 @@
#ifndef __SELINUX_H__
#define __SELINUX_H__
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#define SELINUX_XATTR "security.selinux"
#define SELINUX_GLUSTER_XATTR "trusted.glusterfs.selinux"
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h
index 39a57ba6fd0..1fe7e2e2798 100644
--- a/xlators/features/shard/src/shard-mem-types.h
+++ b/xlators/features/shard/src/shard-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __SHARD_MEM_TYPES_H__
#define __SHARD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_shard_mem_types_ {
gf_shard_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
index 89a96709219..2d0867eb136 100644
--- a/xlators/features/shard/src/shard-messages.h
+++ b/xlators/features/shard/src/shard-messages.h
@@ -11,7 +11,7 @@
#ifndef _SHARD_MESSAGES_H_
#define _SHARD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 5ffeaa63628..e5f93063943 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -12,9 +12,9 @@
#include "shard.h"
#include "shard-mem-types.h"
-#include "byte-order.h"
-#include "defaults.h"
-#include "statedump.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
static gf_boolean_t
__is_shard_dir(uuid_t gfid)
@@ -80,7 +80,8 @@ __shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
INIT_LIST_HEAD(&ctx_p->ilist);
INIT_LIST_HEAD(&ctx_p->to_fsync_list);
- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set(inode, this, &ctx_uint);
if (ret < 0) {
GF_FREE(ctx_p);
return ret;
@@ -273,6 +274,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
* of the to_fsync_list.
*/
inode_ref(base_inode);
+ inode_ref(shard_inode);
LOCK(&base_inode->lock);
LOCK(&shard_inode->lock);
@@ -286,8 +288,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
/* Unref the base inode corresponding to the ref above, if the shard is
* found to be already part of the fsync list.
*/
- if (ret != 0)
+ if (ret != 0) {
inode_unref(base_inode);
+ inode_unref(shard_inode);
+ }
return ret;
}
@@ -509,6 +513,9 @@ shard_local_wipe(shard_local_t *local)
loc_wipe(&local->int_entrylk.loc);
loc_wipe(&local->newloc);
+ if (local->name)
+ GF_FREE(local->name);
+
if (local->int_entrylk.basename)
GF_FREE(local->int_entrylk.basename);
if (local->fd)
@@ -686,8 +693,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
ctx->block_num = block_num;
list_add_tail(&ctx->ilist, &priv->ilist_head);
priv->inode_count++;
- if (base_inode)
- ctx->base_inode = inode_ref(base_inode);
+ ctx->base_inode = inode_ref(base_inode);
} else {
/*If on the other hand there is no available slot for this inode
* in the list, delete the lru inode from the head of the list,
@@ -734,6 +740,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
inode_forget(lru_inode, 0);
} else {
+ /* The following unref corresponds to the ref
+ * held when the shard was added to fsync list.
+ */
+ inode_unref(lru_inode);
fsync_inode = lru_inode;
if (lru_base_inode)
inode_unref(lru_base_inode);
@@ -758,8 +768,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
else
gf_uuid_copy(ctx->base_gfid, gfid);
ctx->block_num = block_num;
- if (base_inode)
- ctx->base_inode = inode_ref(base_inode);
+ ctx->base_inode = inode_ref(base_inode);
list_add_tail(&ctx->ilist, &priv->ilist_head);
}
} else {
@@ -879,26 +888,34 @@ int
shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
call_frame_t *frame, int32_t op_ret)
{
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
- local = frame->local;
+ /* the below 3 variables are required because, in SHARD_STACK_UNWIND()
+ macro, there is a check for local being null. So many static analyzers
+ backtrace the code with assumption of possible (local == NULL) case,
+ and complains for below lines. By handling it like below, we overcome
+ the warnings */
+
+ struct iatt *prebuf = ((local) ? &local->prebuf : NULL);
+ struct iatt *postbuf = ((local) ? &local->postbuf : NULL);
+ dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL);
switch (fop) {
case GF_FOP_WRITE:
- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_FALLOCATE:
- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_ZEROFILL:
- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_DISCARD:
- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
default:
gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -987,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1003,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -1130,6 +1177,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
{
int ret = -1;
int64_t *size_attr = NULL;
+ int64_t delta_blocks = 0;
inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
@@ -1151,13 +1199,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
/* If both size and block count have not changed, then skip the xattrop.
*/
- if ((local->delta_size + local->hole_size == 0) &&
- (local->delta_blocks == 0)) {
+ delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
goto out;
}
ret = shard_set_size_attrs(local->delta_size + local->hole_size,
- local->delta_blocks, &size_attr);
+ delta_blocks, &size_attr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
"Failed to set size attrs for %s", uuid_utoa(inode->gfid));
@@ -1461,16 +1509,45 @@ int
shard_start_background_deletion(xlator_t *this)
{
int ret = 0;
+ gf_boolean_t i_cleanup = _gf_true;
+ shard_priv_t *priv = NULL;
call_frame_t *cleanup_frame = NULL;
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ switch (priv->bg_del_state) {
+ case SHARD_BG_DELETION_NONE:
+ i_cleanup = _gf_true;
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ break;
+ case SHARD_BG_DELETION_LAUNCHING:
+ i_cleanup = _gf_false;
+ break;
+ case SHARD_BG_DELETION_IN_PROGRESS:
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ i_cleanup = _gf_false;
+ break;
+ default:
+ break;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (!i_cleanup)
+ return 0;
+
cleanup_frame = create_frame(this, this->ctx->pool);
if (!cleanup_frame) {
gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
"Failed to create "
"new frame to delete shards");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err;
}
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+
ret = synctask_new(this->ctx->env, shard_delete_shards,
shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
if (ret < 0) {
@@ -1479,7 +1556,16 @@ shard_start_background_deletion(xlator_t *this)
"failed to create task to do background "
"cleanup of shards");
STACK_DESTROY(cleanup_frame->root);
+ goto err;
}
+ return 0;
+
+err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
return ret;
}
@@ -1488,7 +1574,7 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int ret = 0;
+ int ret = -1;
shard_priv_t *priv = NULL;
gf_boolean_t i_start_cleanup = _gf_false;
@@ -1521,23 +1607,25 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
LOCK(&priv->lock);
{
- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+ if (priv->first_lookup_done == _gf_false) {
+ priv->first_lookup_done = _gf_true;
i_start_cleanup = _gf_true;
}
}
UNLOCK(&priv->lock);
- if (i_start_cleanup) {
- ret = shard_start_background_deletion(this);
- if (ret) {
- LOCK(&priv->lock);
- {
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- }
- UNLOCK(&priv->lock);
+ if (!i_start_cleanup)
+ goto unwind;
+
+ ret = shard_start_background_deletion(this);
+ if (ret < 0) {
+ LOCK(&priv->lock);
+ {
+ priv->first_lookup_done = _gf_false;
}
+ UNLOCK(&priv->lock);
}
+
unwind:
SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
@@ -1553,7 +1641,8 @@ shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
shard_local_t *local = NULL;
this->itable = loc->inode->table;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
}
@@ -1603,26 +1692,24 @@ err:
}
int
-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -1;
int32_t mask = SHARD_INODE_WRITE_MASK;
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
shard_inode_ctx_t ctx = {
0,
};
- local = frame->local;
-
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
"Lookup on base file"
" failed : %s",
- loc_gfid_utoa(&(local->loc)));
+ uuid_utoa(inode->gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto unwind;
@@ -1656,18 +1743,57 @@ unwind:
}
int
-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
- shard_post_fop_handler_t handler)
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
{
int ret = -1;
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
gf_boolean_t need_refresh = _gf_false;
local = frame->local;
local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
&need_refresh);
/* By this time, inode ctx should have been created either in create,
* mknod, readdirp or lookup. If not it is a bug!
@@ -1676,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_msg_debug(this->name, 0,
"Skipping lookup on base file: %s"
"Serving prebuf off the inode ctx cache",
- uuid_utoa(loc->gfid));
+ uuid_utoa(inode->gfid));
goto out;
}
@@ -1687,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
dict_unref(xattr_req);
return 0;
@@ -1902,6 +2032,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dict_t *xdata)
{
inode_t *inode = NULL;
+ int64_t delta_blocks = 0;
shard_local_t *local = NULL;
local = frame->local;
@@ -1922,14 +2053,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
local->postbuf.ia_size = local->offset;
- local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
/* Let the delta be negative. We want xattrop to do subtraction */
local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
- local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
+ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+ postbuf->ia_blocks - prebuf->ia_blocks);
+ GF_ASSERT(delta_blocks <= 0);
+ local->postbuf.ia_blocks += delta_blocks;
local->hole_size = 0;
- shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
-
+ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
return 0;
@@ -1957,10 +2089,9 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
*/
if (!inode) {
gf_msg_debug(this->name, 0,
- "Last shard to be truncated absent"
- " in backend: %s. Directly proceeding to update "
- "file size",
- uuid_utoa(inode->gfid));
+ "Last shard to be truncated absent in backend: %" PRIu64
+ " of gfid %s. Directly proceeding to update file size",
+ local->first_block, uuid_utoa(local->loc.inode->gfid));
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
return 0;
@@ -1989,8 +2120,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ int ret = 0;
int call_count = 0;
int shard_block_num = (long)cookie;
+ uint64_t block_count = 0;
shard_local_t *local = NULL;
local = frame->local;
@@ -2000,6 +2133,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
goto done;
}
+ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+ if (!ret) {
+ GF_ATOMIC_SUB(local->delta_blocks, block_count);
+ } else {
+ /* dict_get failed possibly due to a heterogeneous cluster? */
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get key %s from dict during truncate of gfid %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ }
shard_unlink_block_inode(local, shard_block_num);
done:
@@ -2029,6 +2172,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
gf_boolean_t wind_failed = _gf_false;
shard_local_t *local = NULL;
shard_priv_t *priv = NULL;
+ dict_t *xdata_req = NULL;
local = frame->local;
priv = this->private;
@@ -2056,7 +2200,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
local->postbuf.ia_size = local->offset;
local->postbuf.ia_blocks = local->prebuf.ia_blocks;
local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->hole_size = 0;
shard_update_file_size(frame, this, local->fd, &local->loc,
shard_post_update_size_truncate_handler);
@@ -2065,6 +2209,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
local->call_count = call_count;
i = 1;
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
+ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key %s into dict during truncate of %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ dict_unref(xdata_req);
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
SHARD_SET_ROOT_FS_ID(frame, local);
while (cur_block <= last_block) {
@@ -2103,7 +2262,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
(void *)(long)cur_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
+ FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
loc_wipe(&loc);
next:
i++;
@@ -2111,6 +2270,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
if (!--call_count)
break;
}
+ dict_unref(xdata_req);
return 0;
}
@@ -2166,13 +2326,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
xlator_t *this = NULL;
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
+ inode_t *base_inode = NULL;
this = THIS;
priv = this->private;
- if (local->loc.inode)
+ if (local->loc.inode) {
gf_uuid_copy(gfid, local->loc.inode->gfid);
- else
+ base_inode = local->loc.inode;
+ } else if (local->resolver_base_inode) {
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ base_inode = local->resolver_base_inode;
+ } else {
gf_uuid_copy(gfid, local->base_gfid);
+ }
shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
@@ -2185,7 +2351,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
LOCK(&priv->lock);
{
fsync_inode = __shard_update_shards_inode_list(
- linked_inode, this, local->loc.inode, block_num, gfid);
+ linked_inode, this, base_inode, block_num, gfid);
}
UNLOCK(&priv->lock);
if (fsync_inode)
@@ -2307,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2327,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2337,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -2482,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
? local->loc.inode
: local->fd->inode;
@@ -2557,7 +2724,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
*/
local->hole_size = local->offset - local->prebuf.ia_size;
local->delta_size = 0;
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->postbuf.ia_size = local->offset;
tmp_stbuf.ia_size = local->offset;
shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
@@ -2573,7 +2740,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
*/
local->hole_size = 0;
local->delta_size = (local->offset - local->prebuf.ia_size);
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
tmp_stbuf.ia_size = local->offset;
shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
SHARD_INODE_WRITE_MASK);
@@ -2629,9 +2796,10 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
if (!local->xattr_req)
goto err;
local->resolver_base_inode = loc->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
return 0;
err:
@@ -2684,9 +2852,10 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
local->resolver_base_inode = fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
@@ -2830,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!local->xattr_req)
goto err;
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_link_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
@@ -2845,13 +3014,20 @@ int
shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
local = frame->local;
+ if (local->resolver_base_inode)
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
- "failed to delete shards of %s",
- uuid_utoa(local->resolver_base_inode->gfid));
+ "failed to delete shards of %s", uuid_utoa(gfid));
return 0;
}
local->op_ret = 0;
@@ -2892,8 +3068,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
shard_priv_t *priv = NULL;
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
- gf_boolean_t unlink_unref_forget = _gf_false;
int unref_base_inode = 0;
+ int unref_shard_inode = 0;
this = THIS;
priv = this->private;
@@ -2918,26 +3094,27 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
list_del_init(&ctx->ilist);
priv->inode_count--;
unref_base_inode++;
+ unref_shard_inode++;
GF_ASSERT(priv->inode_count >= 0);
- unlink_unref_forget = _gf_true;
}
if (ctx->fsync_needed) {
unref_base_inode++;
+ unref_shard_inode++;
list_del_init(&ctx->to_fsync_list);
- if (base_inode)
+ if (base_inode) {
__shard_inode_ctx_get(base_inode, this, &base_ictx);
- if (base_ictx)
base_ictx->fsync_count--;
+ }
}
}
UNLOCK(&inode->lock);
if (base_inode)
UNLOCK(&base_inode->lock);
- if (unlink_unref_forget) {
- inode_unlink(inode, priv->dot_shard_inode, block_bname);
- inode_unref(inode);
- inode_forget(inode, 0);
- }
+
+ inode_unlink(inode, priv->dot_shard_inode, block_bname);
+ inode_ref_reduce_by_n(inode, unref_shard_inode);
+ inode_forget(inode, 0);
+
if (base_inode && unref_base_inode)
inode_ref_reduce_by_n(base_inode, unref_base_inode);
UNLOCK(&priv->lock);
@@ -3339,9 +3516,13 @@ shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
loc.inode = inode_ref(priv->dot_shard_rm_inode);
ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
- if (ret)
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ ret = 0;
+ }
goto out;
+ }
{
ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
}
@@ -3355,20 +3536,6 @@ out:
int
shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
{
- xlator_t *this = NULL;
- shard_priv_t *priv = NULL;
-
- this = frame->this;
- priv = this->private;
-
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Background deletion of shards failed");
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- } else {
- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
- }
SHARD_STACK_DESTROY(frame);
return 0;
}
@@ -3490,6 +3657,7 @@ shard_delete_shards(void *opaque)
gf_dirent_t entries;
gf_dirent_t *entry = NULL;
call_frame_t *cleanup_frame = NULL;
+ gf_boolean_t done = _gf_false;
this = THIS;
priv = this->private;
@@ -3506,6 +3674,7 @@ shard_delete_shards(void *opaque)
goto err;
}
cleanup_frame->local = local;
+ local->fop = GF_FOP_UNLINK;
local->xattr_req = dict_new();
if (!local->xattr_req) {
@@ -3543,51 +3712,76 @@ shard_delete_shards(void *opaque)
goto err;
}
- while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
- &entries, local->xattr_req, NULL))) {
- if (ret > 0)
- ret = 0;
- list_for_each_entry(entry, &entries.list, list)
+ for (;;) {
+ offset = 0;
+ LOCK(&priv->lock);
{
- offset = entry->d_off;
-
- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
- continue;
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ done = _gf_true;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (done)
+ break;
+ while (
+ (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+ &entries, local->xattr_req, NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
- if (!entry->inode) {
- ret = shard_lookup_marker_entry(this, local, entry);
- if (ret < 0)
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- }
- link_inode = inode_link(entry->inode, local->fd->inode,
- entry->d_name, &entry->d_stat);
- gf_msg_debug(this->name, 0,
- "Initiating deletion of "
- "shards of gfid %s",
- entry->d_name);
- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
- link_inode);
- inode_unlink(link_inode, local->fd->inode, entry->d_name);
- inode_unref(link_inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Failed to clean up shards of gfid %s", entry->d_name);
- continue;
+ if (!entry->inode) {
+ ret = shard_lookup_marker_entry(this, local, entry);
+ if (ret < 0)
+ continue;
+ }
+ link_inode = inode_link(entry->inode, local->fd->inode,
+ entry->d_name, &entry->d_stat);
+
+ gf_msg_debug(this->name, 0,
+ "Initiating deletion of "
+ "shards of gfid %s",
+ entry->d_name);
+ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+ link_inode);
+ inode_unlink(link_inode, local->fd->inode, entry->d_name);
+ inode_unref(link_inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to clean up shards of gfid %s",
+ entry->d_name);
+ continue;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ SHARD_MSG_SHARD_DELETION_COMPLETED,
+ "Deleted "
+ "shards of gfid=%s from backend",
+ entry->d_name);
}
- gf_msg(this->name, GF_LOG_INFO, 0,
- SHARD_MSG_SHARD_DELETION_COMPLETED,
- "Deleted "
- "shards of gfid=%s from backend",
- entry->d_name);
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
}
- gf_dirent_free(&entries);
- if (ret)
- break;
}
ret = 0;
+ loc_wipe(&loc);
+ return ret;
+
err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
loc_wipe(&loc);
return ret;
}
@@ -3925,6 +4119,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
local->op_errno = op_errno;
} else {
+ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
local->preoldparent = *preparent;
local->postoldparent = *postparent;
if (xdata)
@@ -4134,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
switch (local->fop) {
case GF_FOP_UNLINK:
case GF_FOP_RENAME:
- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
- shard_post_lookup_base_shard_rm_handler);
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
break;
default:
gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -4390,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->block_size) {
local->tmp_loc.inode = inode_new(this->itable);
gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
- shard_lookup_base_file(frame, this, &local->tmp_loc,
- shard_post_rename_lookup_handler);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
} else {
shard_rename_cbk(frame, this);
}
@@ -4648,6 +4843,8 @@ out:
if (xdata)
local->xattr_rsp = dict_ref(xdata);
vec.iov_base = local->iobuf->ptr;
+ if (local->offset + local->req_size > local->prebuf.ia_size)
+ local->total_size = local->prebuf.ia_size - local->offset;
vec.iov_len = local->total_size;
local->op_ret = local->total_size;
SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
@@ -5028,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = local->loc.inode;
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
@@ -5124,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_readv_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
@@ -5226,7 +5424,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
local->op_errno = op_errno;
} else {
local->written_size += op_ret;
- local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
+ GF_ATOMIC_ADD(local->delta_blocks,
+ post->ia_blocks - pre->ia_blocks);
local->delta_size += (post->ia_size - pre->ia_size);
shard_inode_ctx_set(local->fd->inode, this, post, 0,
SHARD_MASK_TIMES);
@@ -5365,21 +5564,17 @@ shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
remaining_size -= shard_write_size;
if (local->fop == GF_FOP_WRITE) {
+ vec = NULL;
count = iov_subset(local->vector, local->count, vec_offset,
- vec_offset + shard_write_size, NULL);
-
- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
- if (!vec) {
+ shard_write_size, &vec, 0);
+ if (count < 0) {
local->op_ret = -1;
local->op_errno = ENOMEM;
wind_failed = _gf_true;
- GF_FREE(vec);
shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
-1, ENOMEM, NULL, NULL, NULL);
goto next;
}
- count = iov_subset(local->vector, local->count, vec_offset,
- vec_offset + shard_write_size, vec);
}
if (cur_block == 0) {
@@ -5491,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
@@ -5521,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
local->last_block = get_highest_block(local->offset, local->total_size,
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
gf_shard_mt_inode_list);
if (!local->inode_list) {
@@ -5529,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
}
gf_msg_trace(this->name, 0,
- "%s: gfid=%s first_block=%" PRIu32
+ "%s: gfid=%s first_block=%" PRIu64
" "
- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+ "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
" total_size=%zu flags=%" PRId32 "",
gf_fop_list[local->fop],
uuid_utoa(local->resolver_base_inode->gfid),
@@ -5736,6 +5934,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
inode_t *base_inode = NULL;
+ gf_boolean_t unref_shard_inode = _gf_false;
local = frame->local;
base_inode = local->fd->inode;
@@ -5769,11 +5968,16 @@ out:
if (ctx->fsync_needed != 0) {
list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
base_ictx->fsync_count++;
+ } else {
+ unref_shard_inode = _gf_true;
}
}
UNLOCK(&anon_fd->inode->lock);
UNLOCK(&base_inode->lock);
}
+
+ if (unref_shard_inode)
+ inode_unref(anon_fd->inode);
if (anon_fd)
fd_unref(anon_fd);
@@ -5920,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_fsync_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
@@ -6113,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
int32_t
-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
+ char *key)
{
- int op_errno = EINVAL;
+ int ret = 0;
+ struct iatt *tmpbuf = NULL;
+ struct iatt *stbuf = NULL;
+ data_t *data = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, key);
+ if (!data)
+ return 0;
+
+ tmpbuf = data_to_iatt(data, key);
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
}
+ *stbuf = *tmpbuf;
+ stbuf->ia_size = local->prebuf.ia_size;
+ stbuf->ia_blocks = local->prebuf.ia_blocks;
+ ret = dict_set_iatt(xdata, key, stbuf, false);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ return 0;
- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+err:
+ GF_FREE(stbuf);
+ return -1;
+}
+
+int32_t
+shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+ if (local->fd)
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
+ local->name, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &local->loc,
+ local->name, local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ /* If shard's special xattrs are attempted to be removed,
+ * fail the fop with EPERM (except if the client is gsyncd).
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
}
+ /* Repeat the same check for bulk-removexattr */
if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+
+ if (name) {
+ local->name = gf_strdup(name);
+ if (!local->name)
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
+ xdata);
return 0;
}
@@ -6235,38 +6601,164 @@ out:
}
int32_t
-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = EINVAL;
+ int ret = -1;
+ shard_local_t *local = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ else
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+ /* Sharded or not, if shard's special xattrs are attempted to be set,
+ * fail the fop with EPERM (except if the client is gsyncd.
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
+ }
+
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+ local->flags = flags;
+ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
+ * and the xdata dict
+ */
+ if (dict)
+ local->xattr_req = dict_ref(dict);
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+ xdata);
return 0;
}
@@ -6524,12 +7016,13 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
local->fd = fd_ref(fd);
local->block_size = block_size;
local->resolver_base_inode = local->fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_common_inode_write_post_lookup_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
return 0;
out:
shard_common_failure_unwind(fop, frame, -1, ENOMEM);
@@ -6674,6 +7167,9 @@ fini(xlator_t *this)
GF_VALIDATE_OR_GOTO("shard", this, out);
+ /*Itable was not created by shard, hence setting to NULL.*/
+ this->itable = NULL;
+
mem_pool_destroy(this->local_pool);
this->local_pool = NULL;
@@ -6821,6 +7317,14 @@ struct xlator_dumpops dumpops = {
struct volume_options options[] = {
{
+ .key = {"shard"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable shard",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
.key = {"shard-block-size"},
.type = GF_OPTION_TYPE_SIZET,
.op_version = {GD_OP_VERSION_3_7_0},
@@ -6862,3 +7366,17 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "shard",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index f877591faee..4fe181b64d5 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -11,10 +11,10 @@
#ifndef __SHARD_H__
#define __SHARD_H__
-#include "xlator.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/compat-errno.h>
#include "shard-messages.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#define GF_SHARD_DIR ".shard"
#define GF_SHARD_REMOVE_ME_DIR ".remove_me"
@@ -200,10 +200,10 @@ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
} while (0)
typedef enum {
- SHARD_FIRST_LOOKUP_PENDING = 0,
- SHARD_FIRST_LOOKUP_IN_PROGRESS,
- SHARD_FIRST_LOOKUP_DONE,
-} shard_first_lookup_state_t;
+ SHARD_BG_DELETION_NONE = 0,
+ SHARD_BG_DELETION_LAUNCHING,
+ SHARD_BG_DELETION_IN_PROGRESS,
+} shard_bg_deletion_state_t;
/* rm = "remove me" */
@@ -217,7 +217,8 @@ typedef struct shard_priv {
int inode_count;
struct list_head ilist_head;
uint32_t deletion_rate;
- shard_first_lookup_state_t first_lookup;
+ shard_bg_deletion_state_t bg_del_state;
+ gf_boolean_t first_lookup_done;
uint64_t lru_limit;
} shard_priv_t;
@@ -253,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
typedef struct shard_local {
int op_ret;
int op_errno;
- int first_block;
- int last_block;
- int num_blocks;
+ uint64_t first_block;
+ uint64_t last_block;
+ uint64_t num_blocks;
int call_count;
int eexist_count;
int create_count;
@@ -274,7 +275,7 @@ typedef struct shard_local {
size_t req_size;
size_t readdir_size;
int64_t delta_size;
- int delta_blocks;
+ gf_atomic_t delta_blocks;
loc_t loc;
loc_t dot_shard_loc;
loc_t dot_shard_rm_loc;
@@ -317,6 +318,7 @@ typedef struct shard_local {
uint32_t deletion_rate;
gf_boolean_t cleanup_required;
uuid_t base_gfid;
+ char *name;
} shard_local_t;
typedef struct shard_inode_ctx {
diff --git a/xlators/features/snapview-client/src/snapview-client-mem-types.h b/xlators/features/snapview-client/src/snapview-client-mem-types.h
index aac0d571c41..3c3ab555a55 100644
--- a/xlators/features/snapview-client/src/snapview-client-mem-types.h
+++ b/xlators/features/snapview-client/src/snapview-client-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _SVC_MEM_TYPES_H
#define _SVC_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum svc_mem_types {
gf_svc_mt_svc_private_t = gf_common_mt_end + 1,
diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h
index b99578a6063..c02fb154930 100644
--- a/xlators/features/snapview-client/src/snapview-client-messages.h
+++ b/xlators/features/snapview-client/src/snapview-client-messages.h
@@ -11,7 +11,7 @@
#ifndef _SNAPVIEW_CLIENT_MESSAGES_H_
#define _SNAPVIEW_CLIENT_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -31,6 +31,41 @@ GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED,
SVC_MSG_XLATOR_CHILDREN_WRONG, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL,
SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, SVC_MSG_OPENDIR_SPECIAL_DIR,
SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- SVC_MSG_ENTRY_POINT_SPECIAL_DIR);
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR,
+ SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV,
+ SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED,
+ SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR,
+ SVC_MSG_MEM_POOL_GET_FAILED);
+#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context"
+#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context"
+#define SVC_MSG_STR_LEN_STR \
+ "destination buffer size is less than the length of entry point name"
+#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph"
+#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph"
+#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context"
+#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \
+ "failed to copy the entry point string"
+#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found"
+#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context"
+#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode"
+#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict"
+#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \
+ "rename happening on a entry residing in snapshot"
+#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context"
+#define SVC_MSG_NULL_PRIV_STR "priv NULL"
+#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point"
+#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed"
+#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child"
+#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \
+ "snap-view-client has got wrong subvolumes. It can have only 2"
+#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \
+ "entry point directory cannot be part of special directory"
+#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory"
+#define SVC_MSG_MEM_POOL_GET_FAILED_STR \
+ "could not get mem pool for frame->local"
+#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private"
+#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \
+ "link happening on a entry residin gin snapshot"
#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
index c991f058dbd..486c5179d5b 100644
--- a/xlators/features/snapview-client/src/snapview-client.c
+++ b/xlators/features/snapview-client/src/snapview-client.c
@@ -9,8 +9,8 @@
*/
#include "snapview-client.h"
-#include "inode.h"
-#include "byte-order.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
static void
svc_local_free(svc_local_t *local)
@@ -198,16 +198,15 @@ __svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
svc_fd = svc_fd_new();
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY,
- "failed to allocate new fd context for gfid %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
ret = __svc_fd_ctx_set(this, fd, svc_fd);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED,
- "failed to set fd context for gfid %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
ret = -1;
}
@@ -238,6 +237,50 @@ out:
return svc_fd;
}
+/**
+ * @this: xlator
+ * @entry_point: pointer to the buffer provided by consumer
+ *
+ * This function is mainly for copying the entry point name
+ * (stored as string in priv->path) to a buffer point to by
+ * @entry_point within the lock. It is for the consumer to
+ * allocate the memory for the buffer.
+ *
+ * This function is called by all the functions (or fops)
+ * who need to use priv->path for avoiding the race.
+ * For example, either in lookup or in any other fop,
+ * while priv->path is being accessed, a reconfigure can
+ * happen to change priv->path. This ensures that, a lock
+ * is taken before accessing priv->path.
+ **/
+int
+gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size)
+{
+ int ret = -1;
+ svc_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry_point, out);
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ if (dest_size <= strlen(priv->path)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN,
+ "dest-size=%zu", dest_size, "priv-path-len=%zu",
+ strlen(priv->path), "path=%s", priv->path, NULL);
+ } else {
+ snprintf(entry_point, dest_size, "%s", priv->path);
+ ret = 0;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return ret;
+}
+
static int32_t
gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
@@ -275,19 +318,17 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
*/
if (op_ret) {
if (subvolume == FIRST_CHILD(this)) {
- gf_msg(this->name,
- (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
- : GF_LOG_ERROR,
- op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL,
- "lookup failed on normal graph with error %s",
- strerror(op_errno));
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
} else {
- gf_msg(this->name,
- (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
- : GF_LOG_ERROR,
- op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL,
- "lookup failed on snapview graph with error %s",
- strerror(op_errno));
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
goto out;
}
@@ -318,10 +359,8 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode type in the inode context "
- "(gfid: %s)",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
out:
if (do_unwind) {
@@ -341,20 +380,19 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int op_ret = -1;
int op_errno = EINVAL;
inode_t *parent = NULL;
- svc_private_t *priv = NULL;
dict_t *new_xdata = NULL;
int inode_type = -1;
int parent_type = -1;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
if (!__is_root_gfid(loc->gfid)) {
if (loc->parent) {
@@ -371,8 +409,7 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (!local) {
op_ret = -1;
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -411,7 +448,13 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
}
}
- if (strcmp(loc->name, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point)) {
if (parent_type == VIRTUAL_INODE) {
subvolume = SECOND_CHILD(this);
} else {
@@ -429,8 +472,8 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
/* Indication of whether the lookup is happening on the
entry point or not, to the snapview-server.
*/
- SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, priv,
- ret, out);
+ SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret,
+ out);
}
}
@@ -469,6 +512,9 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
0,
};
loc_t *temp_loc = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -484,7 +530,13 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (path_len >= snap_len && inode_type == VIRTUAL_INODE) {
path = &loc->path[path_len - snap_len];
- if (!strcmp(path, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcmp(path, entry_point)) {
/*
* statfs call for virtual snap directory.
* Sent the fops to parent volume by removing
@@ -515,20 +567,24 @@ gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- /* Consider a testcase:
+ /* TODO: FIX ME
+ * Consider a testcase:
* #mount -t nfs host1:/vol1 /mnt
* #ls /mnt
* #ls /mnt/.snaps (As expected this fails)
* #gluster volume set vol1 features.uss enable
- * Now `ls /mnt/.snaps` should work,
- * but fails with No such file or directory.
- * This is because NFS client caches the list of files in
- * a directory. This cache is updated if there are any changes
- * in the directory attributes. To solve this problem change
- * a attribute 'ctime' when USS is enabled
+ * Now `ls /mnt/.snaps` should work, but fails with No such file or
+ * directory. This is because NFS client (gNFS) caches the list of files
+ * in a directory. This cache is updated if there are any changes in the
+ * directory attributes. So, one way to solve this problem is to change
+ * 'ctime' attribute when USS is enabled as below.
+ *
+ * if (op_ret == 0 && IA_ISDIR(buf->ia_type))
+ * buf->ia_ctime_nsec++;
+ *
+ * But this is not the ideal solution as applications see the unexpected
+ * ctime change causing failures.
*/
- if (op_ret == 0 && IA_ISDIR(buf->ia_type))
- buf->ia_ctime_nsec++;
SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
return 0;
@@ -637,8 +693,8 @@ gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (special_dir) {
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "fd context not found for %s", uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -683,19 +739,17 @@ gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
local = mem_get0(this->local_pool);
if (!local) {
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate memory for local "
- "(path: %s, gfid: %s)",
- loc->path, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto out;
}
+ loc_copy(&local->loc, loc);
+ frame->local = local;
SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
subvolume, out);
-
- loc_copy(&local->loc, loc);
local->subvolume = subvolume;
- frame->local = local;
STACK_WIND(frame, gf_svc_opendir_cbk, subvolume, subvolume->fops->opendir,
loc, fd, xdata);
@@ -728,11 +782,9 @@ gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s "
- "(gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid= %s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -816,6 +868,9 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
char attrname[PATH_MAX] = "";
char attrval[64] = "";
dict_t *dict = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -839,14 +894,20 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
strcat(attrname, ":");
if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) {
- if (!strcasecmp(attrval, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcasecmp(attrval, entry_point)) {
dict = dict_new();
if (NULL == dict) {
op_errno = ENOMEM;
goto out;
}
- ret = dict_set_dynstr_with_alloc(dict, (char *)name, priv->path);
+ ret = dict_set_dynstr_with_alloc(dict, (char *)name, entry_point);
if (ret) {
op_errno = ENOMEM;
@@ -854,7 +915,7 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
}
op_errno = 0;
- op_ret = strlen(priv->path) + 1;
+ op_ret = strlen(entry_point) + 1;
/* We should return from here */
goto out;
}
@@ -931,11 +992,9 @@ gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s "
- "(gfid: %s)",
- loc->name, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -977,10 +1036,9 @@ gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -1022,11 +1080,9 @@ gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s "
- "(gfid: %s)",
- loc->name, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -1062,8 +1118,8 @@ gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1079,29 +1135,33 @@ gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
} else {
@@ -1134,8 +1194,8 @@ gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1151,29 +1211,33 @@ gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
xdata);
@@ -1254,8 +1318,8 @@ gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
@@ -1272,30 +1336,34 @@ gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
GF_VALIDATE_OR_GOTO(this->name, fd, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
xdata);
@@ -1329,8 +1397,8 @@ gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1347,29 +1415,33 @@ gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
int op_ret = -1;
int op_errno = EINVAL;
int ret = -1;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
xdata);
@@ -1407,11 +1479,9 @@ gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context "
- "for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
@@ -1533,14 +1603,13 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_dirent_t *entry = NULL;
gf_dirent_t *tmpentry = NULL;
svc_local_t *local = NULL;
- svc_private_t *priv = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
if (op_ret < 0)
goto out;
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- priv = this->private;
local = frame->local;
/* If .snaps pre-exists, then it should not be listed
@@ -1551,9 +1620,23 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->subvolume != FIRST_CHILD(this))
goto out;
+ /*
+ * Better to goto out if getting the entry point
+ * fails. We might end up sending the directory
+ * entry for the snapview entry point in the readdir
+ * response. But, the intention is to avoid the race
+ * condition where priv->path is being changed in
+ * reconfigure while this is accessing it.
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
{
- if (strcmp(priv->path, entry->d_name) == 0)
+ if (strcmp(entry_point, entry->d_name) == 0)
gf_dirent_entry_free(entry);
}
@@ -1585,9 +1668,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
else {
if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
op_ret = 0;
@@ -1601,9 +1683,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local = mem_get0(this->local_pool);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local (inode: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
local->subvolume = subvolume;
@@ -1655,17 +1736,16 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
gf_dirent_t entries;
gf_dirent_t *entry = NULL;
- svc_private_t *private = NULL;
svc_fd_t *svc_fd = NULL;
svc_local_t *local = NULL;
int inode_type = -1;
int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- private
- = this->private;
INIT_LIST_HEAD(&entries.list);
local = frame->local;
@@ -1685,18 +1765,25 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
svc_fd = svc_fd_ctx_get(this, local->fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for the inode %s",
- uuid_utoa(local->fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
op_ret = 0;
op_errno = ENOENT;
goto out;
}
- entry = gf_dirent_for_name(private->path);
+ entry = gf_dirent_for_name(entry_point);
if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "failed to allocate memory for the entry %s", private->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point=%s", entry_point, NULL);
op_ret = 0;
op_errno = ENOMEM;
goto out;
@@ -1710,9 +1797,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = VIRTUAL_INODE;
ret = svc_inode_ctx_set(this, entry->inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set the inode context for the entry %s",
- entry->d_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "entry-name=%s", entry->d_name, NULL);
list_add_tail(&entry->list, &entries.list);
op_ret = 1;
@@ -1732,18 +1818,16 @@ int
gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
dict_t *xdata)
{
- svc_private_t *private = NULL;
svc_local_t *local = NULL;
loc_t *loc = NULL;
dict_t *tmp_xdata = NULL;
char *path = NULL;
int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- private
- = this->private;
local = frame->local;
loc = &local->loc;
@@ -1759,13 +1843,19 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
inode_unref(loc->inode);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY,
- "failed to allocate new inode");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
goto out;
}
gf_uuid_copy(local->loc.gfid, loc->inode->gfid);
- ret = inode_path(loc->parent, private->path, &path);
+ ret = inode_path(loc->parent, entry_point, &path);
if (ret < 0)
goto out;
@@ -1790,8 +1880,7 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
ret = dict_set_str(tmp_xdata, "entry-point", "true");
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED,
- "failed to set dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
goto out;
}
@@ -1820,6 +1909,9 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
gf_boolean_t unwind = _gf_true;
svc_fd_t *svc_fd = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -1832,9 +1924,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
fd = local->fd;
svc_fd = svc_fd_ctx_get(this, fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -1850,12 +1941,18 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == 0 && op_errno == ENOENT && private->special_dir &&
strcmp(private->special_dir, "") && svc_fd->special_dir &&
local->subvolume == FIRST_CHILD(this)) {
- inode = inode_grep(fd->inode->table, fd->inode, private->path);
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ SVC_MSG_GET_FD_CONTEXT_FAILED, NULL);
+ goto out;
+ }
+
+ inode = inode_grep(fd->inode->table, fd->inode, entry_point);
if (!inode) {
inode = inode_new(fd->inode->table);
if (!inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "failed to allocate new inode");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
goto out;
}
}
@@ -1863,7 +1960,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
gf_uuid_copy(local->loc.pargfid, fd->inode->gfid);
gf_uuid_copy(local->loc.gfid, inode->gfid);
if (gf_uuid_is_null(inode->gfid))
- ret = inode_path(fd->inode, private->path, &path);
+ ret = inode_path(fd->inode, entry_point, &path);
else
ret = inode_path(inode, NULL, &path);
@@ -1885,8 +1982,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_str(tmp_xdata, "entry-point", "true");
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED,
- "failed to set dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
goto out;
}
@@ -1923,21 +2019,21 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
svc_fd_t *svc_fd = NULL;
gf_boolean_t unwind = _gf_true;
- svc_private_t *priv = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
if (op_ret < 0)
goto out;
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- priv = this->private;
+
local = frame->local;
svc_fd = svc_fd_ctx_get(this, local->fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for gfid %s",
- uuid_utoa(local->fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
}
if (local->subvolume == FIRST_CHILD(this))
@@ -1945,6 +2041,19 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
else
inode_type = VIRTUAL_INODE;
+ /*
+ * Better to goto out and return whatever is there in the
+ * readdirp response (even if the readdir response contains
+ * a directory entry for the snapshot entry point). Otherwise
+ * if we ignore the error, then there is a chance of race
+ * condition where, priv->path is changed in reconfigure
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
+ goto out;
+ }
+
list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
{
/* If .snaps pre-exists, then it should not be listed
@@ -1952,7 +2061,7 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* so filter the .snaps entry if exists.
* However it is OK to list .snaps in VIRTUAL world
*/
- if (inode_type == NORMAL_INODE && !strcmp(priv->path, entry->d_name)) {
+ if (inode_type == NORMAL_INODE && !strcmp(entry_point, entry->d_name)) {
gf_dirent_entry_free(entry);
continue;
}
@@ -1962,9 +2071,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = svc_inode_ctx_set(this, entry->inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL);
if (svc_fd)
svc_fd->last_offset = entry->d_off;
}
@@ -2003,8 +2111,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local = mem_get0(this->local_pool);
if (!local) {
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -2019,9 +2126,8 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
*/
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for the inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
else {
if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
op_ret = 0;
@@ -2076,22 +2182,17 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the context for the inode "
- "%s",
- uuid_utoa(oldloc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(oldloc->inode->gfid), NULL);
goto out;
}
if (src_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename happening on a entry %s "
- "residing in snapshot",
- oldloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL);
goto out;
}
@@ -2100,11 +2201,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename of %s happening to a entry "
- "%s residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
goto out;
}
}
@@ -2114,11 +2213,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_parent_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename of %s happening to a entry %s "
- "residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
goto out;
}
}
@@ -2160,9 +2257,8 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && src_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- "link happening on a entry %s residing in snapshot",
- oldloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, NULL);
goto out;
}
@@ -2170,10 +2266,9 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_parent_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- "link of %s happening to a entry %s "
- "residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name,
+ NULL);
goto out;
}
@@ -2208,11 +2303,9 @@ gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s "
- "(gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -2253,10 +2346,9 @@ gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -2340,8 +2432,9 @@ gf_svc_forget(xlator_t *this, inode_t *inode)
ret = inode_ctx_del(inode, this, &value);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DELETE_INODE_CONTEXT_FAILED,
- "failed to delete inode context for %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -2349,16 +2442,112 @@ out:
return 0;
}
+static int
+gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv)
+{
+ int ret = -1;
+
+ if (!priv) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL);
+ goto out;
+ }
+
+ GF_FREE(priv->path);
+ GF_FREE(priv->special_dir);
+
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
+
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * ** NOTE **:
+ * =============
+ * The option "snapdir-entry-path" is NOT reconfigurable.
+ * That option as of now is only for the consumption of
+ * samba, where, it needs to tell glusterfs about the
+ * directory that is shared with windows client for the
+ * access. Now, in windows-explorer (GUI) interface, for
+ * the directory shared, the entry point to the snapshot
+ * world (snapshot-directory option) should be visible,
+ * atleast as a hidden entry. For that to happen, glusterfs
+ * has to send that entry in the readdir response coming on
+ * the directory used as the smb share. Therefore, samba,
+ * while initializing the gluster volume (via gfapi) sets
+ * the xlator option "snapdir-entry-path" to the directory
+ * which is to be shared with windows (check the file
+ * vfs_glusterfs.c from samba source code). So to avoid
+ * problems with smb access, not allowing snapdir-entry-path
+ * option to be configurable. That option is for those
+ * consumers who know what they are doing.
+ **/
int
reconfigure(xlator_t *this, dict_t *options)
{
svc_private_t *priv = NULL;
+ char *path = NULL;
+ gf_boolean_t show_entry_point = _gf_false;
+ char *tmp = NULL;
priv = this->private;
- GF_OPTION_RECONF("snapshot-directory", priv->path, options, str, out);
- GF_OPTION_RECONF("show-snapshot-directory", priv->show_entry_point, options,
- bool, out);
+ GF_OPTION_RECONF("snapshot-directory", path, options, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_RECONF("show-snapshot-directory", show_entry_point, options, bool,
+ out);
+
+ /*
+ * The assumption now is that priv->path is an allocated memory (either
+ * in init or in a previous reconfigure).
+ * So, the intention here is to preserve the older contents of the option
+ * until the new option's value has been completely stored in the priv.
+ * So, do this.
+ * - Store the pointer of priv->path in a temporary pointer.
+ * - Allocate new memory for the new value of the option that is just
+ * obtained from the above call to GF_OPTION_RECONF.
+ * - If the above allocation fails, again set the pointer from priv
+ * to the address stored in tmp. i.e. the previous value.
+ * - If the allocation succeeds, then free the tmp pointer.
+ * WARNING: Before changing the allocation and freeing logic of
+ * priv->path, always check the init function to see how
+ * priv->path is set. Take decisions accordingly. As of now,
+ * the assumption is that, the string elements of private
+ * structure of snapview-client are allocated (either in
+ * init or here in reconfugure).
+ */
+ LOCK(&priv->lock);
+ {
+ tmp = priv->path;
+ priv->path = NULL;
+ priv->path = gf_strdup(path);
+ if (!priv->path) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to reconfigure snapshot-directory option to %s",
+ path);
+ priv->path = tmp;
+ } else {
+ GF_FREE(tmp);
+ tmp = NULL;
+ }
+
+ priv->show_entry_point = show_entry_point;
+ }
+ UNLOCK(&priv->lock);
out:
return 0;
@@ -2375,9 +2564,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED,
- "Memory accounting"
- " init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL);
}
return ret;
@@ -2390,10 +2577,11 @@ init(xlator_t *this)
int ret = -1;
int children = 0;
xlator_list_t *xl = NULL;
+ char *path = NULL;
+ char *special_dir = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR,
- "configured without any child");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL);
goto out;
}
@@ -2404,11 +2592,8 @@ init(xlator_t *this)
}
if (children != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG,
- "snap-view-client "
- "has got %d subvolumes. It can have only 2 "
- "subvolumes.",
- children);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG,
+ "subvol-num=%d", children, NULL);
goto out;
}
@@ -2424,34 +2609,59 @@ init(xlator_t *this)
if (!private)
goto out;
- GF_OPTION_INIT("snapshot-directory", private->path, str, out);
- GF_OPTION_INIT("snapdir-entry-path", private->special_dir, str, out);
- GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool,
- out);
+ LOCK_INIT(&private->lock);
- if (strstr(private->special_dir, private->path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR,
- "entry point "
- "directory cannot be part of the special directory");
- GF_FREE(private->special_dir);
- private
- ->special_dir = NULL;
+ GF_OPTION_INIT("snapshot-directory", path, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
goto out;
}
- this->private = private;
+ private
+ ->path = gf_strdup(path);
+ if (!private->path) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point-path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out);
+ if (!special_dir || strstr(special_dir, path)) {
+ if (special_dir)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path,
+ "special-dir=%s", special_dir);
+ else
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR,
+ NULL);
+ goto out;
+ }
+
+ private
+ ->special_dir = gf_strdup(special_dir);
+ if (!private->special_dir) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "special-directory=%s", special_dir, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool,
+ out);
+
this->local_pool = mem_pool_new(svc_local_t, 128);
if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "could not get mem pool for frame->local");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL);
goto out;
}
+ this->private = private;
+
ret = 0;
out:
if (ret)
- GF_FREE(private);
+ (void)gf_svc_priv_destroy(this, private);
return ret;
}
@@ -2468,9 +2678,15 @@ fini(xlator_t *this)
if (!priv)
return;
- this->private = NULL;
+ /*
+ * Just log the failure and go ahead to
+ * set this->priv to NULL.
+ */
+ if (gf_svc_priv_destroy(this, priv))
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED,
+ NULL);
- GF_FREE(priv);
+ this->private = NULL;
return;
}
@@ -2559,3 +2775,17 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-client",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-client/src/snapview-client.h b/xlators/features/snapview-client/src/snapview-client.h
index d19a796f101..166116a439d 100644
--- a/xlators/features/snapview-client/src/snapview-client.h
+++ b/xlators/features/snapview-client/src/snapview-client.h
@@ -10,11 +10,11 @@
#ifndef __SNAP_VIEW_CLIENT_H__
#define __SNAP_VIEW_CLIENT_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "snapview-client-mem-types.h"
#include "snapview-client-messages.h"
@@ -39,8 +39,8 @@ typedef struct __svc_local svc_local_t;
svc_local_free(__local); \
} while (0)
-#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, priv, \
- ret, label) \
+#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret, \
+ label) \
do { \
if (!xdata) { \
xdata = new_xdata = dict_new(); \
@@ -81,6 +81,7 @@ struct svc_private {
char *path;
char *special_dir; /* needed for samba */
gf_boolean_t show_entry_point;
+ gf_lock_t lock; /* mainly to guard private->path */
};
typedef struct svc_private svc_private_t;
diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c
index 6530901c5b8..62c1ddac49c 100644
--- a/xlators/features/snapview-server/src/snapview-server-helpers.c
+++ b/xlators/features/snapview-server/src/snapview-server-helpers.c
@@ -10,7 +10,7 @@
#include "snapview-server.h"
#include "snapview-server-mem-types.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "xdr-generic.h"
#include "protocol-common.h"
@@ -476,6 +476,7 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
char logfile[PATH_MAX] = {
0,
};
+ char *volfile_server = NULL;
GF_VALIDATE_OR_GOTO("snapview-server", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -512,14 +513,50 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
goto out;
}
- ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ /*
+ * Before, localhost was used as the volfile server. But, with that
+ * method, accessing snapshots started giving ENOENT error if a
+ * specific bind address is mentioned in the glusterd volume file.
+ * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211.
+ * So, the new method is tried below, where, snapview-server first
+ * uses the volfile server used by the snapd (obtained from the
+ * command line arguments saved in the global context of the process).
+ * If the volfile server in global context is NULL, then localhost
+ * is tried (like before).
+ */
+ if (this->ctx->cmd_args.volfile_server) {
+ volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server);
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server %s. ",
+ this->ctx->cmd_args.volfile_server);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "volfile server is NULL in cmd args. "
+ "Trying with localhost");
+ volfile_server = gf_strdup("localhost");
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server localhost.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, local_errno,
SVS_MSG_SET_VOLFILE_SERVR_FAILED,
"setting the "
- "volfile server for snap volume %s "
+ "volfile server %s for snap volume %s "
"failed",
- dirent->name);
+ volfile_server, dirent->name);
goto out;
}
@@ -561,6 +598,7 @@ out:
dirent->fs = fs;
}
+ GF_FREE(volfile_server);
return fs;
}
diff --git a/xlators/features/snapview-server/src/snapview-server-mem-types.h b/xlators/features/snapview-server/src/snapview-server-mem-types.h
index 504c7969bdc..63456b85323 100644
--- a/xlators/features/snapview-server/src/snapview-server-mem-types.h
+++ b/xlators/features/snapview-server/src/snapview-server-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __SNAP_VIEW_MEM_TYPES_H
#define __SNAP_VIEW_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum snapview_mem_types {
gf_svs_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h
index 44cc1575405..f634ab5d2b0 100644
--- a/xlators/features/snapview-server/src/snapview-server-messages.h
+++ b/xlators/features/snapview-server/src/snapview-server-messages.h
@@ -11,7 +11,7 @@
#ifndef _SNAPVIEW_SERVER_MESSAGES_H_
#define _SNAPVIEW_SERVER_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -49,6 +49,6 @@ GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED,
SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED,
SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED,
SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED,
- SVS_MSG_GLFS_INIT_FAILED);
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED);
#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
index 8e23b9adb2e..ecf31c3b880 100644
--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c
+++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
@@ -26,11 +26,11 @@ mgmt_cbk_snap(struct rpc_clnt *rpc, void *mydata, void *data)
return 0;
}
-rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
- [GF_CBK_GET_SNAPS] = {"GETSNAPS", GF_CBK_GET_SNAPS, mgmt_cbk_snap},
+static rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_GET_SNAPS] = {"GETSNAPS", mgmt_cbk_snap, GF_CBK_GET_SNAPS},
};
-struct rpcclnt_cb_program svs_cbk_prog = {
+static struct rpcclnt_cb_program svs_cbk_prog = {
.progname = "GlusterFS Callback",
.prognum = GLUSTER_CBK_PROGRAM,
.progver = GLUSTER_CBK_VERSION,
@@ -38,12 +38,12 @@ struct rpcclnt_cb_program svs_cbk_prog = {
.numactors = GF_CBK_MAXVALUE,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_NULL] = "NULL",
[GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
};
-rpc_clnt_prog_t svs_clnt_handshake_prog = {
+static rpc_clnt_prog_t svs_clnt_handshake_prog = {
.progname = "GlusterFS Handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
@@ -86,6 +86,7 @@ svs_mgmt_init(xlator_t *this)
char *host = NULL;
cmd_args_t *cmd_args = NULL;
glusterfs_ctx_t *ctx = NULL;
+ xlator_cmdline_option_t *opt = NULL;
GF_VALIDATE_OR_GOTO("snapview-server", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -100,7 +101,13 @@ svs_mgmt_init(xlator_t *this)
if (cmd_args->volfile_server)
host = cmd_args->volfile_server;
- ret = rpc_transport_inet_options_build(&options, host, port);
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt != NULL ? opt->value : NULL));
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED,
"failed to build the "
@@ -142,6 +149,8 @@ svs_mgmt_init(xlator_t *this)
gf_msg_debug(this->name, 0, "svs mgmt init successful");
out:
+ if (options)
+ dict_unref(options);
if (ret)
if (priv) {
rpc_clnt_connection_cleanup(&priv->rpc->conn);
@@ -228,7 +237,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
glusterfs_ctx_t *ctx = NULL;
int ret = -1;
dict_t *dict = NULL;
- char key[1024] = {0};
+ char key[32] = {0};
+ int len;
int snapcount = 0;
svs_private_t *priv = NULL;
xlator_t *this = NULL;
@@ -247,7 +257,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
this = frame->this;
ctx = frame->this->ctx;
priv = this->private;
- old_dirents = priv->dirents;
if (!ctx) {
errno = EINVAL;
@@ -322,8 +331,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
for (i = 0; i < snapcount; i++) {
- snprintf(key, sizeof(key), "snap-volname.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snap-volname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -335,8 +344,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
strncpy(dirents[i].snap_volname, value,
sizeof(dirents[i].snap_volname));
- snprintf(key, sizeof(key), "snap-id.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snap-id.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -346,8 +355,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
strncpy(dirents[i].uuid, value, sizeof(dirents[i].uuid));
- snprintf(key, sizeof(key), "snapname.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snapname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -379,6 +388,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
LOCK(&priv->snaplist_lock);
{
oldcount = priv->num_snaps;
+ old_dirents = priv->dirents;
for (i = 0; i < priv->num_snaps; i++) {
for (j = 0; j < snapcount; j++) {
if ((!strcmp(old_dirents[i].name, dirents[j].name)) &&
@@ -398,7 +408,12 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (old_dirents) {
for (i = 0; i < oldcount; i++) {
if (old_dirents[i].fs)
- glfs_fini(old_dirents[i].fs);
+ gf_msg_debug(this->name, 0,
+ "calling glfs_fini on "
+ "name: %s, snap_volname: %s, uuid: %s",
+ old_dirents[i].name, old_dirents[i].snap_volname,
+ old_dirents[i].uuid);
+ glfs_fini(old_dirents[i].fs);
}
}
diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
index 7a8e29ecdfc..76cccae5914 100644
--- a/xlators/features/snapview-server/src/snapview-server.c
+++ b/xlators/features/snapview-server/src/snapview-server.c
@@ -9,15 +9,17 @@
*/
#include "snapview-server.h"
#include "snapview-server-mem-types.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "xdr-generic.h"
#include "protocol-common.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include <pthread.h>
+#include "glfs-internal.h"
+
int
gf_setcredentials(uid_t *uid, gid_t *gid, uint16_t ngrps, uint32_t *groups)
{
@@ -826,7 +828,8 @@ out:
* back into the dict. But to get the values for those xattrs it has to do the
* getxattr operation on each xattr which might turn out to be a costly
* operation. So for each of the xattrs present in the list, a 0 byte value
- * ("") is set into the dict before unwinding. This can be treated as an
+ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr
+ * value(in a file system) we use an extra key in the same dictionary as an
* indicator to other xlators which want to cache the xattrs (as of now,
* md-cache which caches acl and selinux related xattrs) to not to cache the
* values of the xattrs present in the dict.
@@ -869,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size)
list_offset += strlen(keybuffer) + 1;
} /* while (remaining_size > 0) */
+ /* Add an additional key to indicate that we don't need to cache these
+ * xattrs(with value "") */
+ ret = dict_set_str(dict, "glusterfs.skip-cache", "");
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
+ "dict set operation for the key glusterfs.skip-cache failed.");
+ goto out;
+ }
+
ret = 0;
out:
@@ -995,8 +1007,8 @@ svs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
op_errno = ENOMEM;
gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
"failed to add xattrs from the list to "
- "dict for %s (gfid: %s, key: %s)",
- loc->path, uuid_utoa(loc->inode->gfid), name);
+ "dict for %s (gfid: %s)",
+ loc->path, uuid_utoa(loc->inode->gfid));
goto out;
}
GF_FREE(value);
@@ -1177,8 +1189,8 @@ svs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
op_errno = ENOMEM;
gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
"failed to add xattrs from the list "
- "to dict (gfid: %s, key: %s)",
- uuid_utoa(fd->inode->gfid), name);
+ "to dict (gfid: %s)",
+ uuid_utoa(fd->inode->gfid));
goto out;
}
GF_FREE(value);
@@ -2000,7 +2012,9 @@ svs_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
"failed",
loc->name, uuid_utoa(loc->inode->gfid));
goto out;
- }
+ } else
+ gf_msg_debug(this->name, 0, "stat on %s (%s) successful", loc->path,
+ uuid_utoa(loc->inode->gfid));
iatt_from_stat(&buf, &stat);
gf_uuid_copy(buf.ia_gfid, loc->inode->gfid);
@@ -2256,7 +2270,7 @@ svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
};
svs_fd_t *sfd = NULL;
int ret = -1;
- struct stat fstatbuf = {
+ struct glfs_stat fstatbuf = {
0,
};
glfs_fd_t *glfd = NULL;
@@ -2333,7 +2347,7 @@ svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
iobref = iobref_new();
iobref_add(iobref, iobuf);
- iatt_from_stat(&stbuf, &fstatbuf);
+ glfs_iatt_from_statx(&stbuf, &fstatbuf);
gf_uuid_copy(stbuf.ia_gfid, fd->inode->gfid);
svs_fill_ino_from_gfid(&stbuf);
@@ -2691,3 +2705,16 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-server",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h
index 6951d31449f..6472422e715 100644
--- a/xlators/features/snapview-server/src/snapview-server.h
+++ b/xlators/features/snapview-server/src/snapview-server.h
@@ -10,26 +10,24 @@
#ifndef __SNAP_VIEW_H__
#define __SNAP_VIEW_H__
-#include "dict.h"
-#include "defaults.h"
-#include "mem-types.h"
-#include "call-stub.h"
-#include "byte-order.h"
-#include "iatt.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/mem-types.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/iatt.h>
#include <ctype.h>
#include <sys/uio.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
#include "glfs.h"
-#include "common-utils.h"
#include "glfs-handles.h"
#include "glfs-internal.h"
#include "glusterfs3-xdr.h"
-#include "glusterfs-acl.h"
-#include "syncop.h"
-#include "list.h"
-#include "timer.h"
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
#include "xdr-generic.h"
@@ -58,9 +56,16 @@
{ \
for (i = 0; i < _private->num_snaps; i++) { \
tmp_fs = _private->dirents[i].fs; \
- gf_log(this->name, GF_LOG_DEBUG, "dirent->fs: %p", tmp_fs); \
+ gf_log(this->name, GF_LOG_DEBUG, \
+ "snap name: %s, snap volume: %s," \
+ "dirent->fs: %p", \
+ _private->dirents[i].name, \
+ _private->dirents[i].snap_volname, tmp_fs); \
if (tmp_fs && fs && (tmp_fs == fs)) { \
found = _gf_true; \
+ gf_msg_debug(this->name, 0, \
+ "found the fs " \
+ "instance"); \
break; \
} \
} \
diff --git a/xlators/features/thin-arbiter/src/Makefile.am b/xlators/features/thin-arbiter/src/Makefile.am
index 7fd31a66caa..a3c133e7798 100644
--- a/xlators/features/thin-arbiter/src/Makefile.am
+++ b/xlators/features/thin-arbiter/src/Makefile.am
@@ -1,6 +1,4 @@
-if WITH_SERVER
xlator_LTLIBRARIES = thin-arbiter.la
-endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
index 79b5ce0eee3..69562d2febc 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
@@ -9,7 +9,7 @@
#ifndef __THIN_ARBITER_MEM_TYPES_H__
#define __THIN_ARBITER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_ta_mem_types_ {
gf_ta_mt_local_t = gf_common_mt_end + 1,
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
index f49b3eedadf..81d7491577a 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
@@ -11,7 +11,7 @@
#ifndef _TA_MESSAGES_H_
#define _TA_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.c b/xlators/features/thin-arbiter/src/thin-arbiter.c
index 062e04132d6..ce3008636f1 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter.c
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.c
@@ -11,11 +11,11 @@
#include "thin-arbiter.h"
#include "thin-arbiter-messages.h"
#include "thin-arbiter-mem-types.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
int
ta_set_incoming_values(dict_t *dict, char *key, data_t *value, void *data)
@@ -646,3 +646,16 @@ struct xlator_cbks cbks = {};
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_6_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "thin-arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.h b/xlators/features/thin-arbiter/src/thin-arbiter.h
index af3d4b1af92..e5f914b84bf 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.h
@@ -11,12 +11,12 @@
#ifndef _THIN_ARBITER_H
#define _THIN_ARBITER_H
-#include "locking.h"
-#include "common-utils.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "list.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/list.h>
#define THIN_ARBITER_SOURCE_XATTR "trusted.ta.source"
#define THIN_ARBITER_SOURCE_SIZE 2
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index 133f2edf99b..43353c8f095 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_trash_mem_types_ {
gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index aa546d3394d..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -9,7 +9,7 @@
*/
#include "trash.h"
#include "trash-mem-types.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#define root_gfid \
(uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
@@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate)
int ret = 0;
char *strtokptr = NULL;
- if (eliminate == NULL) {
+ if ((str == NULL) || (eliminate == NULL)) {
ret = EINVAL;
goto out;
}
@@ -212,11 +212,11 @@ void
append_time_stamp(char *name, size_t name_size)
{
int i;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS);
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
/* removing white spaces in timestamp */
for (i = 0; i < strlen(timestr); i++) {
@@ -2523,6 +2523,7 @@ out:
GF_FREE(priv);
}
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
return ret;
}
@@ -2636,3 +2637,17 @@ struct volume_options options[] = {
.default_value = "{{ brick.path }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trash",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index 675f9f97350..6671617c2c6 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -10,11 +10,11 @@
#ifndef __TRASH_H__
#define __TRASH_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "inode.c"
#include "fnmatch.h"
diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h
index e509a89acd5..db649b2c9a6 100644
--- a/xlators/features/upcall/src/upcall-cache-invalidation.h
+++ b/xlators/features/upcall/src/upcall-cache-invalidation.h
@@ -15,10 +15,4 @@
* events post its last access */
#define CACHE_INVALIDATION_TIMEOUT "60"
-/* xlator options */
-gf_boolean_t
-is_cache_invalidation_enabled(xlator_t *this);
-int32_t
-get_cache_invalidation_timeout(xlator_t *this);
-
#endif /* __UPCALL_CACHE_INVALIDATION_H__ */
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
index 9d16e5f0ef8..c641bd6f432 100644
--- a/xlators/features/upcall/src/upcall-internal.c
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -12,20 +12,20 @@
#include <fcntl.h>
#include <limits.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
#include "upcall.h"
#include "upcall-mem-types.h"
#include "glusterfs3-xdr.h"
#include "protocol-common.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
/*
* Check if any of the upcall options are enabled:
@@ -35,62 +35,37 @@ gf_boolean_t
is_upcall_enabled(xlator_t *this)
{
upcall_private_t *priv = NULL;
- gf_boolean_t is_enabled = _gf_false;
if (this->private) {
priv = (upcall_private_t *)this->private;
-
- if (priv->cache_invalidation_enabled) {
- is_enabled = _gf_true;
- }
+ return priv->cache_invalidation_enabled;
}
- return is_enabled;
+ return _gf_false;
}
/*
* Get the cache_invalidation_timeout
*/
-int32_t
+static int32_t
get_cache_invalidation_timeout(xlator_t *this)
{
upcall_private_t *priv = NULL;
- int32_t timeout = 0;
if (this->private) {
priv = (upcall_private_t *)this->private;
- timeout = priv->cache_invalidation_timeout;
- }
-
- return timeout;
-}
-
-/*
- * Allocate and add a new client entry to the given upcall entry
- */
-upcall_client_t *
-add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
-{
- upcall_client_t *up_client_entry = NULL;
-
- pthread_mutex_lock(&up_inode_ctx->client_list_lock);
- {
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
+ return priv->cache_invalidation_timeout;
}
- pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
- return up_client_entry;
+ return 0;
}
-upcall_client_t *
+static upcall_client_t *
__add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
+ upcall_inode_ctx_t *up_inode_ctx, time_t now)
{
- upcall_client_t *up_client_entry = NULL;
-
- up_client_entry = GF_CALLOC(1, sizeof(*up_client_entry),
- gf_upcall_mt_upcall_client_entry_t);
+ upcall_client_t *up_client_entry = GF_MALLOC(
+ sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t);
if (!up_client_entry) {
gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY,
"Memory allocation failed");
@@ -98,7 +73,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client,
}
INIT_LIST_HEAD(&up_client_entry->client_list);
up_client_entry->client_uid = gf_strdup(client->client_uid);
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = now;
up_client_entry->expire_time_attr = get_cache_invalidation_timeout(
frame->this);
@@ -110,39 +85,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client,
return up_client_entry;
}
-/*
- * Given client->uid, retrieve the corresponding upcall client entry.
- * If none found, create a new entry.
- */
-upcall_client_t *
-__get_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
-{
- upcall_client_t *up_client_entry = NULL;
- upcall_client_t *tmp = NULL;
- gf_boolean_t found_client = _gf_false;
-
- list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list,
- client_list)
- {
- if (strcmp(client->client_uid, up_client_entry->client_uid) == 0) {
- /* found client entry. Update the access_time */
- up_client_entry->access_time = time(NULL);
- found_client = _gf_true;
- gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client found - %s",
- up_client_entry->client_uid);
- break;
- }
- }
-
- if (!found_client) { /* create one */
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
- }
-
- return up_client_entry;
-}
-
-int
+static int
__upcall_inode_ctx_set(inode_t *inode, xlator_t *this)
{
upcall_inode_ctx_t *inode_ctx = NULL;
@@ -158,7 +101,7 @@ __upcall_inode_ctx_set(inode_t *inode, xlator_t *this)
if (!ret)
goto out;
- inode_ctx = GF_CALLOC(1, sizeof(upcall_inode_ctx_t),
+ inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t),
gf_upcall_mt_upcall_inode_ctx_t);
if (!inode_ctx) {
@@ -190,7 +133,7 @@ out:
return ret;
}
-upcall_inode_ctx_t *
+static upcall_inode_ctx_t *
__upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
{
upcall_inode_ctx_t *inode_ctx = NULL;
@@ -229,8 +172,20 @@ upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
return inode_ctx;
}
-int
-upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx)
+static int
+__upcall_cleanup_client_entry(upcall_client_t *up_client)
+{
+ list_del_init(&up_client->client_list);
+
+ GF_FREE(up_client->client_uid);
+ GF_FREE(up_client);
+
+ return 0;
+}
+
+static int
+upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx,
+ time_t now)
{
upcall_client_t *up_client = NULL;
upcall_client_t *tmp = NULL;
@@ -245,7 +200,7 @@ upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx)
list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list,
client_list)
{
- t_expired = time(NULL) - up_client->access_time;
+ t_expired = now - up_client->access_time;
if (t_expired > (2 * timeout)) {
gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)",
@@ -269,17 +224,6 @@ out:
return ret;
}
-int
-__upcall_cleanup_client_entry(upcall_client_t *up_client)
-{
- list_del_init(&up_client->client_list);
-
- GF_FREE(up_client->client_uid);
- GF_FREE(up_client);
-
- return 0;
-}
-
/*
* Free Upcall inode_ctx client list
*/
@@ -298,6 +242,10 @@ __upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx)
return 0;
}
+static void
+upcall_cache_forget(xlator_t *this, inode_t *inode,
+ upcall_inode_ctx_t *up_inode_ctx);
+
/*
* Free upcall_inode_ctx
*/
@@ -360,6 +308,7 @@ upcall_reaper_thread(void *data)
upcall_inode_ctx_t *tmp = NULL;
xlator_t *this = NULL;
time_t timeout = 0;
+ time_t time_now;
this = (xlator_t *)data;
GF_ASSERT(this);
@@ -367,33 +316,35 @@ upcall_reaper_thread(void *data)
priv = this->private;
GF_ASSERT(priv);
+ time_now = gf_time();
while (!priv->fini) {
list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
inode_ctx_list)
{
/* cleanup expired clients */
- upcall_cleanup_expired_clients(this, inode_ctx);
+ upcall_cleanup_expired_clients(this, inode_ctx, time_now);
if (!inode_ctx->destroy) {
continue;
}
+ /* client list would have been cleaned up*/
+ gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)",
+ inode_ctx);
LOCK(&priv->inode_ctx_lk);
{
- /* client list would have been cleaned up*/
- gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)",
- inode_ctx);
list_del_init(&inode_ctx->inode_ctx_list);
pthread_mutex_destroy(&inode_ctx->client_list_lock);
- GF_FREE(inode_ctx);
- inode_ctx = NULL;
}
UNLOCK(&priv->inode_ctx_lk);
+ GF_FREE(inode_ctx);
+ inode_ctx = NULL;
}
/* don't do a very busy loop */
timeout = get_cache_invalidation_timeout(this);
sleep(timeout / 2);
+ time_now = gf_time();
}
return NULL;
@@ -486,6 +437,13 @@ up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs)
return ret;
}
+static void
+upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
+ upcall_client_t *up_client_entry, uint32_t flags,
+ struct iatt *stbuf, struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now);
+
gf_boolean_t
up_invalidate_needed(dict_t *xattrs)
{
@@ -520,6 +478,8 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
upcall_client_t *tmp = NULL;
upcall_inode_ctx_t *up_inode_ctx = NULL;
gf_boolean_t found = _gf_false;
+ time_t time_now;
+ inode_t *linked_inode = NULL;
if (!is_upcall_enabled(this))
return;
@@ -532,7 +492,20 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
return;
}
- if (inode)
+ /* For nameless LOOKUPs, inode created shall always be
+ * invalid. Hence check if there is any already linked inode.
+ * If yes, update the inode_ctx of that valid inode
+ */
+ if (inode && (inode->ia_type == IA_INVAL) && stbuf) {
+ linked_inode = inode_find(inode->table, stbuf->ia_gfid);
+ if (linked_inode) {
+ gf_log("upcall", GF_LOG_DEBUG,
+ "upcall_inode_ctx_get of linked inode (%p)", inode);
+ up_inode_ctx = upcall_inode_ctx_get(linked_inode, this);
+ }
+ }
+
+ if (inode && !up_inode_ctx)
up_inode_ctx = upcall_inode_ctx_get(inode, this);
if (!up_inode_ctx) {
@@ -560,6 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
goto out;
}
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
@@ -567,7 +541,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
{
/* Do not send UPCALL event if same client. */
if (!strcmp(client->client_uid, up_client_entry->client_uid)) {
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = time_now;
found = _gf_true;
continue;
}
@@ -589,17 +563,21 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
* Also if the file is frequently accessed, set
* expire_time_attr to 0.
*/
- upcall_client_cache_invalidate(this, up_inode_ctx->gfid,
- up_client_entry, flags, stbuf,
- p_stbuf, oldp_stbuf, xattr);
+ upcall_client_cache_invalidate(
+ this, up_inode_ctx->gfid, up_client_entry, flags, stbuf,
+ p_stbuf, oldp_stbuf, xattr, time_now);
}
if (!found) {
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
+ up_client_entry = __add_upcall_client(frame, client, up_inode_ctx,
+ time_now);
}
}
pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
out:
+ /* release the ref from inode_find */
+ if (linked_inode)
+ inode_unref(linked_inode);
return;
}
@@ -607,11 +585,12 @@ out:
* If the upcall_client_t has recently accessed the file (i.e, within
* priv->cache_invalidation_timeout), send a upcall notification.
*/
-void
+static void
upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
upcall_client_t *up_client_entry, uint32_t flags,
struct iatt *stbuf, struct iatt *p_stbuf,
- struct iatt *oldp_stbuf, dict_t *xattr)
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now)
{
struct gf_upcall up_req = {
0,
@@ -621,7 +600,7 @@ upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
};
time_t timeout = 0;
int ret = -1;
- time_t t_expired = time(NULL) - up_client_entry->access_time;
+ time_t t_expired = now - up_client_entry->access_time;
GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate",
!(gf_uuid_is_null(gfid)), out);
@@ -678,32 +657,32 @@ out:
* Send "UP_FORGET" to all the clients so that they invalidate their cache
* entry and do a fresh lookup next time when any I/O comes in.
*/
-void
+static void
upcall_cache_forget(xlator_t *this, inode_t *inode,
upcall_inode_ctx_t *up_inode_ctx)
{
upcall_client_t *up_client_entry = NULL;
upcall_client_t *tmp = NULL;
- uint32_t flags = 0;
+ uint32_t flags = UP_FORGET;
+ time_t time_now;
if (!up_inode_ctx) {
return;
}
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
&up_inode_ctx->client_list, client_list)
{
- flags = UP_FORGET;
-
- /* Set the access time to time(NULL)
+ /* Set the access time to gf_time()
* to send notify */
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = time_now;
upcall_client_cache_invalidate(this, up_inode_ctx->gfid,
up_client_entry, flags, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, time_now);
}
}
pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
diff --git a/xlators/features/upcall/src/upcall-mem-types.h b/xlators/features/upcall/src/upcall-mem-types.h
index 079677ff79c..f9883d9d72c 100644
--- a/xlators/features/upcall/src/upcall-mem-types.h
+++ b/xlators/features/upcall/src/upcall-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __UPCALL_MEM_TYPES_H__
#define __UPCALL_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_upcall_mem_types_ {
gf_upcall_mt_conf_t = gf_common_mt_end + 1,
diff --git a/xlators/features/upcall/src/upcall-messages.h b/xlators/features/upcall/src/upcall-messages.h
index db5cac1e07d..4095a34c200 100644
--- a/xlators/features/upcall/src/upcall-messages.h
+++ b/xlators/features/upcall/src/upcall-messages.h
@@ -11,7 +11,7 @@
#ifndef _UPCALL_MESSAGES_H_
#define _UPCALL_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
index 5fdd4993003..0795f58059d 100644
--- a/xlators/features/upcall/src/upcall.c
+++ b/xlators/features/upcall/src/upcall.c
@@ -13,19 +13,19 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "upcall.h"
#include "upcall-mem-types.h"
#include "glusterfs3-xdr.h"
#include "protocol-common.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
static int32_t
up_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
@@ -57,14 +57,13 @@ static int32_t
up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int count, off_t off, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -167,14 +165,13 @@ static int32_t
up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -220,14 +217,13 @@ static int32_t
up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -274,14 +270,13 @@ static int32_t
up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -343,14 +338,13 @@ static int32_t
up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -410,14 +404,13 @@ static int32_t
up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -472,14 +465,13 @@ static int32_t
up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -531,14 +523,13 @@ static int32_t
up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -592,14 +583,13 @@ static int32_t
up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -653,14 +643,13 @@ static int32_t
up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *params)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -717,15 +706,13 @@ static int32_t
up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
-
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -774,14 +761,13 @@ out:
static int32_t
up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -826,14 +812,13 @@ out:
static int32_t
up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -852,14 +837,13 @@ err:
static int32_t
up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -879,14 +863,13 @@ static int32_t
up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -932,14 +915,13 @@ static int32_t
up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -986,14 +968,13 @@ static int32_t
up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1047,14 +1028,13 @@ static int32_t
up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1110,14 +1090,13 @@ static int32_t
up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1164,14 +1143,13 @@ static int32_t
up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1216,14 +1194,13 @@ out:
static int32_t
up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1270,14 +1247,13 @@ static int32_t
up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1334,14 +1310,13 @@ static int32_t
up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *dict)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1361,14 +1336,13 @@ static int32_t
up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1415,14 +1389,13 @@ static int32_t
up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
off_t offset, size_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1470,14 +1443,13 @@ static int32_t
up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1524,14 +1496,13 @@ static int
up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1577,14 +1548,13 @@ static int32_t
up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
gf_seek_what_t what, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1652,14 +1622,13 @@ static int32_t
up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1727,14 +1696,13 @@ static int32_t
up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1800,7 +1768,7 @@ static int32_t
up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
dict_t *xattr = NULL;
@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
xattr = dict_for_key_value(name, "", 1, _gf_true);
if (!xattr) {
- op_errno = ENOMEM;
goto err;
}
local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1885,7 +1851,7 @@ static int32_t
up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
dict_t *xattr = NULL;
@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
xattr = dict_for_key_value(name, "", 1, _gf_true);
if (!xattr) {
- op_errno = ENOMEM;
goto err;
}
local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1950,14 +1914,13 @@ static int32_t
up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -2000,14 +1963,13 @@ static int32_t
up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -2335,14 +2297,14 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
upcall_private_t *priv = NULL;
priv = this->private;
if (!priv) {
- return 0;
+ return;
}
this->private = NULL;
@@ -2367,7 +2329,7 @@ fini(xlator_t *this)
this->local_pool = NULL;
}
- return 0;
+ return;
}
int
@@ -2527,3 +2489,17 @@ struct volume_options options[] = {
.tags = {"cache", "cachetimeout", "upcall"}},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "upcall",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h
index 3797e62aac3..aa535088ad7 100644
--- a/xlators/features/upcall/src/upcall.h
+++ b/xlators/features/upcall/src/upcall.h
@@ -10,12 +10,12 @@
#ifndef __UPCALL_H__
#define __UPCALL_H__
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "upcall-mem-types.h"
-#include "client_t.h"
+#include <glusterfs/client_t.h>
#include "upcall-messages.h"
#include "upcall-cache-invalidation.h"
-#include "upcall-utils.h"
+#include <glusterfs/upcall-utils.h>
#define EXIT_IF_UPCALL_OFF(this, label) \
do { \
@@ -100,32 +100,10 @@ upcall_local_t *
upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
inode_t *inode, dict_t *xattr);
-upcall_client_t *
-add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-upcall_client_t *
-__add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-upcall_client_t *
-__get_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-int
-__upcall_cleanup_client_entry(upcall_client_t *up_client);
-int
-upcall_cleanup_expired_clients(xlator_t *this,
- upcall_inode_ctx_t *up_inode_ctx);
-
-int
-__upcall_inode_ctx_set(inode_t *inode, xlator_t *this);
-upcall_inode_ctx_t *
-__upcall_inode_ctx_get(inode_t *inode, xlator_t *this);
upcall_inode_ctx_t *
upcall_inode_ctx_get(inode_t *inode, xlator_t *this);
int
upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode);
-void
-upcall_cache_forget(xlator_t *this, inode_t *inode,
- upcall_inode_ctx_t *up_inode_ctx);
void *
upcall_reaper_thread(void *data);
@@ -142,12 +120,6 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
inode_t *inode, uint32_t flags, struct iatt *stbuf,
struct iatt *p_stbuf, struct iatt *oldp_stbuf,
dict_t *xattr);
-void
-upcall_client_cache_invalidate(xlator_t *xl, uuid_t gfid,
- upcall_client_t *up_client_entry, uint32_t flags,
- struct iatt *stbuf, struct iatt *p_stbuf,
- struct iatt *oldp_stbuf, dict_t *xattr);
-
int
up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs);
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.c b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
index b4be66eebd1..f2f35322926 100644
--- a/xlators/features/utime/src/utime-autogen-fops-tmpl.c
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
@@ -18,11 +18,11 @@
#include "config.h"
#endif
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
#include "utime-helpers.h"
-#include "timespec.h"
+#include <glusterfs/timespec.h>
#pragma generate
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.h b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
index e2e807cdf64..4e102ffed6c 100644
--- a/xlators/features/utime/src/utime-autogen-fops-tmpl.h
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
@@ -15,7 +15,7 @@
#ifndef _UTIME_AUTOGEN_FOPS_H
#define _UTIME_AUTOGEN_FOPS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#pragma generate
diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
index ab56dc9a4b3..9fb3e1b8b1a 100755
--- a/xlators/features/utime/src/utime-gen-fops-c.py
+++ b/xlators/features/utime/src/utime-gen-fops-c.py
@@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
}
"""
+FOPS_COPY_FILE_RANGE_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
FOPS_SETATTR_TEMPLATE = """
int32_t
gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
@@ -81,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
frame->root->flags |= MDATA_CTIME;
}
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ if (valid & GF_ATTR_ATIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_atime;
+ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec;
+ } else if (valid & GF_ATTR_MTIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_mtime;
+ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec;
+ }
+ }
+
STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
return 0;
@@ -94,6 +118,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
utime_read_op = ['readv']
utime_write_op = ['writev']
utime_setattr_ops = ['setattr', 'fsetattr']
+utime_copy_file_range_ops = ['copy_file_range']
def gen_defaults():
for name in ops:
@@ -109,6 +134,9 @@ def gen_defaults():
if name in utime_setattr_ops:
print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs))
+ if name in utime_copy_file_range_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs))
for l in open(sys.argv[1], 'r').readlines():
if l.find('#pragma generate') != -1:
diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py
index 3686f2e3c1e..e96274c229a 100755
--- a/xlators/features/utime/src/utime-gen-fops-h.py
+++ b/xlators/features/utime/src/utime-gen-fops-h.py
@@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',
'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr',
- 'readv', 'writev', 'setattr', 'fsetattr']
+ 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range']
def gen_defaults():
for name, value in ops.items():
diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
index c79e12badfa..29d9ad93561 100644
--- a/xlators/features/utime/src/utime-helpers.c
+++ b/xlators/features/utime/src/utime-helpers.c
@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts)
#ifdef TIME_UTC
timespec_get(ts, TIME_UTC);
#else
- timespec_now(ts);
+ timespec_now_realtime(ts);
#endif
}
@@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,
frame->root->flags |= MDATA_CTIME;
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ /* Below 2 are for destination fd */
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ /* Below flag is for the source fd */
+ if (!utime_priv->noatime) {
+ frame->root->flags |= MDATA_ATIME;
+ }
+ break;
default:
frame->root->flags = 0;
}
diff --git a/xlators/features/utime/src/utime-helpers.h b/xlators/features/utime/src/utime-helpers.h
index b89867a3db3..2e32d4bece6 100644
--- a/xlators/features/utime/src/utime-helpers.h
+++ b/xlators/features/utime/src/utime-helpers.h
@@ -11,10 +11,9 @@
#ifndef _UTIME_HELPERS_H
#define _UTIME_HELPERS_H
-#include "glusterfs-fops.h"
-#include "stack.h"
-#include "xlator.h"
-#include "timespec.h"
+#include <glusterfs/stack.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/timespec.h>
#include <time.h>
void
diff --git a/xlators/features/utime/src/utime-mem-types.h b/xlators/features/utime/src/utime-mem-types.h
index fbd9aff0eca..ad1255f85f3 100644
--- a/xlators/features/utime/src/utime-mem-types.h
+++ b/xlators/features/utime/src/utime-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __UTIME_MEM_TYPES_H__
#define __UTIME_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_utime_mem_types_ {
utime_mt_utime_t = gf_common_mt_end + 1,
diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h
index 7613c335d43..bd40265abaf 100644
--- a/xlators/features/utime/src/utime-messages.h
+++ b/xlators/features/utime/src/utime-messages.h
@@ -11,7 +11,7 @@
#ifndef __UTIME_MESSAGES_H__
#define __UTIME_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -23,6 +23,7 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY);
+GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED,
+ UTIME_MSG_DICT_SET_FAILED);
#endif /* __UTIME_MESSAGES_H__ */
diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c
index 418e4c4a0d5..2acc63e6a05 100644
--- a/xlators/features/utime/src/utime.c
+++ b/xlators/features/utime/src/utime.c
@@ -9,8 +9,10 @@
*/
#include "utime.h"
+#include "utime-helpers.h"
#include "utime-messages.h"
#include "utime-mem-types.h"
+#include <glusterfs/call-stub.h>
int32_t
gf_utime_invalidate(xlator_t *this, inode_t *inode)
@@ -133,6 +135,141 @@ mem_acct_init(xlator_t *this)
}
int32_t
+gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+{
+ call_stub_t *stub = frame->local;
+ /* Don't fail lookup if mdata setxattr fails */
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED,
+ "dict set of key for set-ctime-mdata failed");
+ }
+ frame->local = NULL;
+ call_resume(stub);
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+int32_t
+gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dict_t *dict = NULL;
+ struct mdata_iatt *mdata = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *new_frame = NULL;
+
+ if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) {
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char);
+ if (mdata == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ iatt_to_mdata(mdata, stbuf);
+ ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "dict set of key for set-ctime-mdata failed");
+ goto err;
+ }
+ new_frame = copy_frame(frame);
+ if (!new_frame) {
+ op_errno = ENOMEM;
+ goto stub_err;
+ }
+
+ new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk,
+ op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ if (!new_frame->local) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "lookup_cbk stub allocation failed");
+ op_errno = ENOMEM;
+ STACK_DESTROY(new_frame->root);
+ goto stub_err;
+ }
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, stbuf->ia_gfid);
+
+ new_frame->root->uid = 0;
+ new_frame->root->gid = 0;
+ new_frame->root->pid = GF_CLIENT_PID_SET_UTIME;
+ STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc,
+ dict, 0, NULL);
+
+ dict_unref(dict);
+ inode_unref(loc.inode);
+ return 0;
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
+ postparent);
+ return 0;
+
+err:
+ if (mdata) {
+ GF_FREE(mdata);
+ }
+stub_err:
+ if (dict) {
+ dict_unref(dict);
+ }
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GF_XATTR_MDATA_KEY);
+ op_errno = -ret;
+ goto free_dict;
+ }
+
+ STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ return 0;
+
+free_dict:
+ dict_unref(xdata);
+err:
+ STACK_UNWIND_STRICT(lookup, frame, ret, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
init(xlator_t *this)
{
utime_priv_t *utime = NULL;
@@ -182,19 +319,27 @@ notify(xlator_t *this, int event, void *data, ...)
}
struct xlator_fops fops = {
- /* TODO: Need to go through other fops and
- * check if they modify time attributes
- */
- .rename = gf_utime_rename, .mknod = gf_utime_mknod,
- .readv = gf_utime_readv, .fremovexattr = gf_utime_fremovexattr,
- .open = gf_utime_open, .create = gf_utime_create,
- .mkdir = gf_utime_mkdir, .writev = gf_utime_writev,
- .rmdir = gf_utime_rmdir, .fallocate = gf_utime_fallocate,
- .truncate = gf_utime_truncate, .symlink = gf_utime_symlink,
- .zerofill = gf_utime_zerofill, .link = gf_utime_link,
- .ftruncate = gf_utime_ftruncate, .unlink = gf_utime_unlink,
- .setattr = gf_utime_setattr, .fsetattr = gf_utime_fsetattr,
- .opendir = gf_utime_opendir, .removexattr = gf_utime_removexattr,
+ .rename = gf_utime_rename,
+ .mknod = gf_utime_mknod,
+ .readv = gf_utime_readv,
+ .fremovexattr = gf_utime_fremovexattr,
+ .open = gf_utime_open,
+ .create = gf_utime_create,
+ .mkdir = gf_utime_mkdir,
+ .writev = gf_utime_writev,
+ .rmdir = gf_utime_rmdir,
+ .fallocate = gf_utime_fallocate,
+ .truncate = gf_utime_truncate,
+ .symlink = gf_utime_symlink,
+ .zerofill = gf_utime_zerofill,
+ .link = gf_utime_link,
+ .ftruncate = gf_utime_ftruncate,
+ .unlink = gf_utime_unlink,
+ .setattr = gf_utime_setattr,
+ .fsetattr = gf_utime_fsetattr,
+ .opendir = gf_utime_opendir,
+ .removexattr = gf_utime_removexattr,
+ .lookup = gf_utime_lookup,
};
struct xlator_cbks cbks = {
.invalidate = gf_utime_invalidate,
@@ -230,3 +375,18 @@ struct volume_options options[] = {
"enabled. When noatime is on, atime is not updated with "
"ctime feature enabled and vice versa."},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_5_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "utime",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/utime/src/utime.h b/xlators/features/utime/src/utime.h
index 236183d4bcc..ba55eec00de 100644
--- a/xlators/features/utime/src/utime.h
+++ b/xlators/features/utime/src/utime.h
@@ -11,9 +11,9 @@
#ifndef __UTIME_H__
#define __UTIME_H__
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "utime-autogen-fops.h"
typedef struct utime_priv {
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index e8e22bb7830..8075fa0c29f 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "mem-types.h"
+
#include "libxlator.h"
int marker_xtime_default_gauge[] = {
@@ -198,10 +198,11 @@ cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (dict_get_ptr(dict, marker_xattr, (void **)&net_timebuf)) {
+ local->count[MCNT_NOTFOUND]++;
+ UNLOCK(&frame->lock);
gf_log(this->name, GF_LOG_WARNING,
"Unable to get <uuid>.xtime attr");
- local->count[MCNT_NOTFOUND]++;
- goto unlock;
+ goto post_unlock;
}
if (local->count[MCNT_FOUND]) {
@@ -221,7 +222,7 @@ cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (callcnt == 0)
cluster_marker_unwind(frame, marker_xattr, local->net_timebuf, 8, dict);
@@ -266,17 +267,17 @@ cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
} else if (volmark->retval) {
GF_FREE(local->volmark);
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
local->retval = volmark->retval;
} else if ((volmark->sec > local->volmark->sec) ||
((volmark->sec == local->volmark->sec) &&
(volmark->usec >= local->volmark->usec))) {
GF_FREE(local->volmark);
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
}
} else {
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
VALIDATE_OR_GOTO(local->volmark, unlock);
gf_uuid_unparse(volmark->uuid, vol_uuid);
if (volmark->retval)
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index 80224f9df38..81da4060d55 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -10,12 +10,14 @@
#ifndef _LIBXLATOR_H
#define _LIBXLATOR_H
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+
+#include <stdint.h> // for int32_t
+#include "glusterfs/dict.h" // for dict_t, data_t
+#include "glusterfs/globals.h" // for xlator_t, loc_t
+#include "glusterfs/stack.h" // for call_frame_t
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
#define XTIME "xtime"
diff --git a/xlators/meta/src/active-link.c b/xlators/meta/src/active-link.c
index 6c060455887..7ee780d89e9 100644
--- a/xlators/meta/src/active-link.c
+++ b/xlators/meta/src/active-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/cmdline-file.c b/xlators/meta/src/cmdline-file.c
index 3d6a020ac6c..eb24e985af9 100644
--- a/xlators/meta/src/cmdline-file.c
+++ b/xlators/meta/src/cmdline-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
cmdline_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/frames-file.c b/xlators/meta/src/frames-file.c
index 1316aa71ce8..9a13db9a934 100644
--- a/xlators/meta/src/frames-file.c
+++ b/xlators/meta/src/frames-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
frames_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
@@ -30,9 +30,10 @@ frames_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
pool = this->ctx->pool;
+ strprintf(strfd, "{ \n\t\"Stack\": [\n");
+
LOCK(&pool->lock);
{
- strprintf(strfd, "{ \n\t\"Stack\": [\n");
list_for_each_entry(stack, &pool->all_frames, all_frames)
{
strprintf(strfd, "\t {\n");
diff --git a/xlators/meta/src/graph-dir.c b/xlators/meta/src/graph-dir.c
index 606fea904be..a8f4787880d 100644
--- a/xlators/meta/src/graph-dir.c
+++ b/xlators/meta/src/graph-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/graphs-dir.c b/xlators/meta/src/graphs-dir.c
index 7c8f4276c03..a1ffbca7d5a 100644
--- a/xlators/meta/src/graphs-dir.c
+++ b/xlators/meta/src/graphs-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/history-file.c b/xlators/meta/src/history-file.c
index a78fe27a54a..7742a635fed 100644
--- a/xlators/meta/src/history-file.c
+++ b/xlators/meta/src/history-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
history_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/logfile-link.c b/xlators/meta/src/logfile-link.c
index 61cd4fee852..616a54518c0 100644
--- a/xlators/meta/src/logfile-link.c
+++ b/xlators/meta/src/logfile-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/logging-dir.c b/xlators/meta/src/logging-dir.c
index 590b5532b38..46e6f9e95dd 100644
--- a/xlators/meta/src/logging-dir.c
+++ b/xlators/meta/src/logging-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/loglevel-file.c b/xlators/meta/src/loglevel-file.c
index 3f3f2707dc0..eeeeeaa5907 100644
--- a/xlators/meta/src/loglevel-file.c
+++ b/xlators/meta/src/loglevel-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
loglevel_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/mallinfo-file.c b/xlators/meta/src/mallinfo-file.c
index d04fe9f4c1e..b4396d72189 100644
--- a/xlators/meta/src/mallinfo-file.c
+++ b/xlators/meta/src/mallinfo-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
static int
mallinfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/measure-file.c b/xlators/meta/src/measure-file.c
index 1529608bb25..52e92e48590 100644
--- a/xlators/meta/src/measure-file.c
+++ b/xlators/meta/src/measure-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
measure_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/meminfo-file.c b/xlators/meta/src/meminfo-file.c
index d6122ae5013..d889dfb2ae8 100644
--- a/xlators/meta/src/meminfo-file.c
+++ b/xlators/meta/src/meminfo-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
meminfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/meta-defaults.c b/xlators/meta/src/meta-defaults.c
index b39e9f21ff7..91c328473f8 100644
--- a/xlators/meta/src/meta-defaults.c
+++ b/xlators/meta/src/meta-defaults.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
int
meta_default_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
@@ -145,11 +145,11 @@ meta_default_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return default_readv_failure_cbk(frame, ENOMEM);
}
+ iov.iov_base = iobuf_ptr(iobuf);
+
/* iobref would have taken a ref */
iobuf_unref(iobuf);
- iov.iov_base = iobuf_ptr(iobuf);
-
copy_offset = min(meta_fd->size, offset);
copy_size = min(size, (meta_fd->size - copy_offset));
@@ -244,7 +244,7 @@ meta_default_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc,
int len = -1;
ops = meta_ops_get(loc->inode, this);
- if (!ops->link_fill) {
+ if (!ops || !ops->link_fill) {
META_STACK_UNWIND(readlink, frame, -1, EPERM, 0, 0, 0);
return 0;
}
diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c
index d7d59c71296..cb54f547468 100644
--- a/xlators/meta/src/meta-helpers.c
+++ b/xlators/meta/src/meta-helpers.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
@@ -182,14 +182,15 @@ meta_uuid_copy(uuid_t dst, uuid_t src)
}
static void
-default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
+default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type,
+ gf_boolean_t is_tunable)
{
struct timeval tv = {};
iatt->ia_type = type;
switch (type) {
case IA_IFDIR:
- iatt->ia_prot = ia_prot_from_st_mode(0755);
+ iatt->ia_prot = ia_prot_from_st_mode(0555);
iatt->ia_nlink = 2;
break;
case IA_IFLNK:
@@ -197,7 +198,7 @@ default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
iatt->ia_nlink = 1;
break;
default:
- iatt->ia_prot = ia_prot_from_st_mode(0644);
+ iatt->ia_prot = ia_prot_from_st_mode(is_tunable ? 0644 : 0444);
iatt->ia_nlink = 1;
break;
}
@@ -225,7 +226,7 @@ meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
return;
if (!ops->iatt_fill)
- default_meta_iatt_fill(iatt, inode, type);
+ default_meta_iatt_fill(iatt, inode, type, !!ops->file_write);
else
ops->iatt_fill(THIS, inode, iatt);
return;
diff --git a/xlators/meta/src/meta-hooks.h b/xlators/meta/src/meta-hooks.h
index 2ee006f7876..7208641398a 100644
--- a/xlators/meta/src/meta-hooks.h
+++ b/xlators/meta/src/meta-hooks.h
@@ -10,7 +10,7 @@
#ifndef __META_HOOKS_H
#define __META_HOOKS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#define DECLARE_HOOK(name) \
int meta_##name##_hook(call_frame_t *frame, xlator_t *this, loc_t *loc, \
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index 0fdfea4fac6..033c306682f 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_meta_mem_types_ {
gf_meta_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index 9294bbdae0c..e1b9a2b6581 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
@@ -226,11 +226,11 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
GF_FREE(this->private);
- return 0;
+ return;
}
struct xlator_fops fops = {.lookup = meta_lookup,
@@ -262,3 +262,15 @@ struct volume_options options[] = {
.description = "Name of default meta directory."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "meta",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 10609a9ec05..7f0cf28808a 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -10,7 +10,7 @@
#ifndef __META_H__
#define __META_H__
-#include "strfd.h"
+#include <glusterfs/strfd.h>
#define DEFAULT_META_DIR_NAME ".meta"
diff --git a/xlators/meta/src/name-file.c b/xlators/meta/src/name-file.c
index edba7142a69..5874a24d78a 100644
--- a/xlators/meta/src/name-file.c
+++ b/xlators/meta/src/name-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
name_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/option-file.c b/xlators/meta/src/option-file.c
index 132b28fa86a..ff55eca592f 100644
--- a/xlators/meta/src/option-file.c
+++ b/xlators/meta/src/option-file.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/options-dir.c b/xlators/meta/src/options-dir.c
index bf2d07a3701..d68a7eeaffc 100644
--- a/xlators/meta/src/options-dir.c
+++ b/xlators/meta/src/options-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/private-file.c b/xlators/meta/src/private-file.c
index 068c8ebf191..23ec319456b 100644
--- a/xlators/meta/src/private-file.c
+++ b/xlators/meta/src/private-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
private_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/process_uuid-file.c b/xlators/meta/src/process_uuid-file.c
index bbf2e19f86e..a24c1b57ab3 100644
--- a/xlators/meta/src/process_uuid-file.c
+++ b/xlators/meta/src/process_uuid-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
process_uuid_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/profile-file.c b/xlators/meta/src/profile-file.c
index 9eb5c050dbe..829dcb77451 100644
--- a/xlators/meta/src/profile-file.c
+++ b/xlators/meta/src/profile-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
profile_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/root-dir.c b/xlators/meta/src/root-dir.c
index 18d4fd6b8ec..80292bd3dda 100644
--- a/xlators/meta/src/root-dir.c
+++ b/xlators/meta/src/root-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/subvolume-link.c b/xlators/meta/src/subvolume-link.c
index 45d909daca2..5b1f752efd0 100644
--- a/xlators/meta/src/subvolume-link.c
+++ b/xlators/meta/src/subvolume-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/subvolumes-dir.c b/xlators/meta/src/subvolumes-dir.c
index e9582c53cf7..3cb170ea1f4 100644
--- a/xlators/meta/src/subvolumes-dir.c
+++ b/xlators/meta/src/subvolumes-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/top-link.c b/xlators/meta/src/top-link.c
index bfee742a1be..33f0d407411 100644
--- a/xlators/meta/src/top-link.c
+++ b/xlators/meta/src/top-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/type-file.c b/xlators/meta/src/type-file.c
index 00141275957..ece342a0b2a 100644
--- a/xlators/meta/src/type-file.c
+++ b/xlators/meta/src/type-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
type_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/version-file.c b/xlators/meta/src/version-file.c
index 7331684238c..36276fb810a 100644
--- a/xlators/meta/src/version-file.c
+++ b/xlators/meta/src/version-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
version_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/view-dir.c b/xlators/meta/src/view-dir.c
index 313bf6d7124..30931061567 100644
--- a/xlators/meta/src/view-dir.c
+++ b/xlators/meta/src/view-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/volfile-file.c b/xlators/meta/src/volfile-file.c
index 5242a14bb8c..b2e2562ab8b 100644
--- a/xlators/meta/src/volfile-file.c
+++ b/xlators/meta/src/volfile-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
xldump_options(dict_t *this, char *key, data_t *value, void *strfd)
diff --git a/xlators/meta/src/xlator-dir.c b/xlators/meta/src/xlator-dir.c
index 1715cbb56a0..86189715790 100644
--- a/xlators/meta/src/xlator-dir.c
+++ b/xlators/meta/src/xlator-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index 6d09e37477c..685beb42d27 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -5,31 +5,34 @@ endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \
-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \
- -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\"
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -I$(top_srcdir)/libglusterd/src/
+
glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
glusterd-volgen.c glusterd-rebalance.c \
glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \
- glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \
+ glusterd-replace-brick.c glusterd-log-ops.c \
glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
- glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \
+ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \
glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
- glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \
- glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c
-
+ glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \
+ $(CONTRIBDIR)/mount/mntent.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/libglusterd/src/libglusterd.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS)
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD)
noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
@@ -38,13 +41,14 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
- glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \
+ glusterd-svc-mgmt.h glusterd-nfs-svc.h \
glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
- glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \
+ glusterd-shd-svc.h glusterd-shd-svc-helper.h \
glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
- $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
@@ -52,7 +56,10 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \
-DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
-DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
+ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \
+ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) \
+ -I$(top_srcdir)/libglusterd/src/
AM_CFLAGS = -Wall $(GF_CFLAGS) $(URCU_CFLAGS) $(URCU_CDS_CFLAGS) $(XML_CFLAGS)
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
index c8e30a5682f..6adb799b18f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -201,6 +201,6 @@ manager:
ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, PROC_START_NO_WAIT);
out:
- gf_msg_debug(this->name, 0, "Returning %d", ret);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
index 0608badb91d..37429fe9214 100644
--- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -16,10 +16,10 @@
#include "glusterd-store.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
-#include "run.h"
-#include "syscall.h"
-#include "byte-order.h"
-#include "compat-errno.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd-scrub-svc.h"
#include "glusterd-messages.h"
@@ -34,6 +34,7 @@ const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = {
[GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency",
[GF_BITROT_OPTION_TYPE_SCRUB] = "scrub",
[GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time",
+ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads",
};
int
@@ -319,7 +320,7 @@ glusterd_bitrot_expiry_time(glusterd_volinfo_t *volinfo, dict_t *dict,
int32_t ret = -1;
uint32_t expiry_time = 0;
xlator_t *this = NULL;
- char dkey[1024] = {
+ char dkey[32] = {
0,
};
@@ -354,6 +355,81 @@ out:
return ret;
}
+static gf_boolean_t
+is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t noop = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ if (!glusterd_is_bitrot_enabled(volinfo))
+ goto out;
+ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ goto out;
+ else {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ noop = _gf_false;
+ return noop;
+ }
+ }
+out:
+ return noop;
+}
+
+static int
+glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ uint32_t signer_th_count = 0;
+ uint32_t existing_th_count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char dkey[32] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bitrot signer thread count.");
+ goto out;
+ }
+
+ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count);
+ if (ret == 0 && signer_th_count == existing_th_count) {
+ goto out;
+ }
+
+ snprintf(dkey, sizeof(dkey), "%d", signer_th_count);
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ if (!is_bitd_configure_noop(this, volinfo)) {
+ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL,
+ "Failed to reconfigure bitrot services");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
static int
glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr)
{
@@ -594,6 +670,15 @@ glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
volinfo, dict, "features.expiry-time", op_errstr);
if (ret)
goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS:
+ ret = glusterd_bitrot_signer_threads(
+ volinfo, dict, "features.signer-threads", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
case GF_BITROT_CMD_SCRUB_STATUS:
case GF_BITROT_CMD_SCRUB_ONDEMAND:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index f64237c4e18..e56cd0e6c74 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -20,42 +20,12 @@
#include "glusterd-svc-helper.h"
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
-#include "run.h"
-#include "glusterd-volgen.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <sys/signal.h>
/* misc */
-gf_boolean_t
-glusterd_is_tiering_supported(char *op_errstr)
-{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- gf_boolean_t supported = _gf_false;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- if (conf->op_version < GD_OP_VERSION_3_7_0)
- goto out;
-
- supported = _gf_true;
-
-out:
- if (!supported && op_errstr != NULL && conf)
- sprintf(op_errstr,
- "Tier operation failed. The cluster is "
- "operating at version %d. Tiering"
- " is unavailable in this version.",
- conf->op_version);
-
- return supported;
-}
-
/* In this function, we decide, based on the 'count' of the brick,
where to add it in the current volume. 'count' tells us already
how many of the given bricks are added. other argument are self-
@@ -212,20 +182,19 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
{
int ret = -1;
int replica_nodes = 0;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
switch (volinfo->type) {
- case GF_CLUSTER_TYPE_TIER:
- ret = 1;
- goto out;
-
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_DISPERSE:
snprintf(err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s",
replica_count, volinfo->volname);
- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_REPLICA, "%s",
- err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT,
+ err_str, NULL);
goto out;
case GF_CLUSTER_TYPE_REPLICATE:
@@ -236,8 +205,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"than volume %s's replica count (%d)",
replica_count, volinfo->volname,
volinfo->replica_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
- "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
if (replica_count == volinfo->replica_count) {
@@ -251,8 +220,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"(or %dxN)",
brick_count, volinfo->dist_leaf_count,
volinfo->dist_leaf_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL,
- GD_MSG_INVALID_ENTRY, "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
ret = 1;
@@ -267,6 +236,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"need %d(xN) bricks for reducing replica "
"count of the volume from %d to %d",
replica_nodes, volinfo->replica_count, replica_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
break;
@@ -316,6 +287,7 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -351,10 +323,13 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
goto out;
}
- if (!(ret = glusterd_check_volume_exists(volname))) {
- ret = -1;
- snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volinfo "
+ "for volume name %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
err_str);
goto out;
}
@@ -396,57 +371,8 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volinfo "
- "for volume name %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
- err_str);
- goto out;
- }
-
total_bricks = volinfo->brick_count + brick_count;
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(err_str, sizeof(err_str), "Volume %s is already a tier.",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_TIER, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- if (glusterd_is_tiering_supported(err_str) == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "failed to get type from dictionary");
- goto out;
- }
-
- goto brick_val;
- }
-
- ret = glusterd_disallow_op_for_tier(volinfo, GD_OP_ADD_BRICK, -1);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Add-brick operation is "
- "not supported on a tiered volume %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_UNSUPPORTED, "%s",
- err_str);
- goto out;
- }
-
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -589,6 +515,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
int i = 0;
int ret = 0;
int count = volinfo->replica_count - replica_count;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (replica_count && subvols) {
for (i = 0; i < volinfo->subvol_count; i++) {
@@ -598,6 +526,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
"Remove exactly %d"
" brick(s) from each subvolume.",
count);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
}
@@ -611,6 +541,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
ret = -1;
snprintf(err_str, err_len, "Bricks not from same subvol for %s",
vol_type);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
} while (++i < volinfo->subvol_count);
@@ -624,43 +556,6 @@ subvol_matcher_destroy(int *subvols)
GF_FREE(subvols);
}
-int
-glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
-{
- char key[64] = "";
- char value[2048] = ""; /* hostname + path */
- int brick_num = 0;
- int hot_brick_num = 0;
- glusterd_brickinfo_t *brickinfo;
- int ret = 0;
- int32_t len = 0;
-
- /* cold tier bricks at tail of list so use reverse iteration */
- cds_list_for_each_entry_reverse(brickinfo, &volinfo->bricks, brick_list)
- {
- brick_num++;
- if (brick_num > volinfo->tier_info.cold_brick_count) {
- hot_brick_num++;
- sprintf(key, "brick%d", hot_brick_num);
- len = snprintf(value, sizeof(value), "%s:%s", brickinfo->hostname,
- brickinfo->path);
- if ((len < 0) || (len >= sizeof(value))) {
- return -1;
- }
-
- ret = dict_set_str(dict, key, strdup(value));
- if (ret)
- break;
- }
- }
-
- ret = dict_set_int32n(dict, "count", SLEN("count"), hot_brick_num);
- if (ret)
- return -1;
-
- return hot_brick_num;
-}
-
static int
glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
int32_t count, int32_t replica_count,
@@ -672,6 +567,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *last = NULL;
char *arbiter_array = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE)
goto out;
@@ -690,6 +588,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
"Remove arbiter "
"brick(s) only when converting from "
"arbiter to replica 2 subvolume.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
goto out;
}
@@ -713,7 +613,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
snprintf(err_str, err_len,
"Removed bricks "
"must contain arbiter when converting"
- " to plain distrubute.");
+ " to plain distribute.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
break;
}
@@ -737,6 +639,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
char key[64] = "";
int keylen;
int i = 1;
+ glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t **brickinfo_list = NULL;
@@ -755,12 +658,15 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Received garbage args");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -813,14 +719,6 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
goto out;
}
- if ((volinfo->type == GF_CLUSTER_TYPE_TIER) &&
- (glusterd_is_tiering_supported(err_str) == _gf_false)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
ret = dict_get_int32n(dict, "command", SLEN("command"), &cmd);
if (ret) {
snprintf(err_str, sizeof(err_str),
@@ -831,15 +729,6 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_disallow_op_for_tier(volinfo, GD_OP_REMOVE_BRICK, cmd);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Removing brick from a Tier volume is not allowed");
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_UNSUPPORTED, "%s",
- err_str);
- goto out;
- }
-
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (!ret) {
@@ -891,8 +780,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) {
snprintf(err_str, sizeof(err_str),
"Remove brick "
@@ -905,18 +793,13 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
}
}
- /* subvol match is not required for tiered volume*/
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
(volinfo->subvol_count > 1)) {
ret = subvol_matcher_init(&subvols, volinfo->subvol_count);
if (ret)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- count = glusterd_set_detach_bricks(dict, volinfo);
-
brickinfo_list = GF_CALLOC(count, sizeof(*brickinfo_list),
gf_common_mt_pointer);
if (!brickinfo_list) {
@@ -957,18 +840,10 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
(volinfo->brick_count <= volinfo->dist_leaf_count))
continue;
- /* Find which subvolume the brick belongs to.
- * subvol match is not required for tiered volume
- *
- */
- if (volinfo->type != GF_CLUSTER_TYPE_TIER)
- subvol_matcher_update(subvols, volinfo, brickinfo);
+ subvol_matcher_update(subvols, volinfo, brickinfo);
}
- /* Check if the bricks belong to the same subvolumes.*/
- /* subvol match is not required for tiered volume*/
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
(volinfo->subvol_count > 1)) {
ret = subvol_matcher_verify(subvols, volinfo, err_str, sizeof(err_str),
vol_type, replica_count);
@@ -982,7 +857,17 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
if (ret)
goto out;
- ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ if (conf->op_version < GD_OP_VERSION_8_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. remove-brick operation"
+ "falling back to syncop framework.",
+ GD_OP_VERSION_8_0);
+ ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK,
+ dict);
+ }
out:
if (ret) {
@@ -1132,13 +1017,13 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
0,
};
gf_boolean_t restart_needed = 0;
- int caps = 0;
int brickid = 0;
char key[64] = "";
char *brick_mount_dir = NULL;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
gf_boolean_t is_valid_add_brick = _gf_false;
+ gf_boolean_t restart_shd = _gf_false;
struct statvfs brickstat = {
0,
};
@@ -1226,10 +1111,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
}
brickinfo->statfs_fsid = brickstat.f_fsid;
}
- /* hot tier bricks are added to head of brick list */
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- cds_list_add(&brickinfo->brick_list, &volinfo->bricks);
- } else if (stripe_count || replica_count) {
+ if (stripe_count || replica_count) {
add_brick_at_right_order(brickinfo, volinfo, (i - 1), stripe_count,
replica_count);
} else {
@@ -1294,22 +1176,19 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
if (count)
brick = strtok_r(brick_list + 1, " \n", &saveptr);
-#ifdef HAVE_BD_XLATOR
- if (brickinfo->vg[0])
- caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
-#endif
- /* This check needs to be added to distinguish between
- * attach-tier commands and add-brick commands.
- * When a tier is attached, adding is done via add-brick
- * and setting of pending xattrs shouldn't be done for
- * attach-tiers as they are virtually new volumes.
- */
if (glusterd_is_volume_replicate(volinfo)) {
- if (replica_count &&
- !dict_getn(dict, "attach-tier", SLEN("attach-tier")) &&
- conf->op_version >= GD_OP_VERSION_3_7_10) {
+ if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) {
is_valid_add_brick = _gf_true;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Failed to stop shd for %s.", volinfo->volname);
+ }
+ restart_shd = _gf_true;
+ }
ret = generate_dummy_client_volfiles(volinfo);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -1324,22 +1203,6 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
_gf_true);
if (ret)
goto out;
-#ifdef HAVE_BD_XLATOR
- char msg[1024] = "";
- /* Check for VG/thin pool if its BD volume */
- if (brickinfo->vg[0]) {
- ret = glusterd_is_valid_vg(brickinfo, 0, msg);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, 0, GD_MSG_INVALID_VG, "%s",
- msg);
- goto out;
- }
- /* if anyone of the brick does not have thin support,
- disable it for entire volume */
- caps &= brickinfo->caps;
- } else
- caps = 0;
-#endif
if (gf_uuid_is_null(brickinfo->uuid)) {
ret = glusterd_resolve_brick(brickinfo);
@@ -1384,7 +1247,6 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
dict_foreach(volinfo->gsync_slaves, _glusterd_restart_gsync_session,
&param);
}
- volinfo->caps = caps;
generate_volfiles:
if (conf->op_version <= GD_OP_VERSION_3_7_5) {
@@ -1401,6 +1263,14 @@ generate_volfiles:
out:
GF_FREE(free_ptr1);
GF_FREE(free_ptr2);
+ if (restart_shd) {
+ if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT)) {
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start shd for %s.", volinfo->volname);
+ }
+ }
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
@@ -1489,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
"Unable to find volume: %s", volname);
goto out;
}
@@ -1508,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (ret) {
- gf_msg_debug(THIS->name, 0, "Unable to get replica count");
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (ret) {
- gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict");
+ gf_msg_debug(this->name, 0, "Unable to get replica count");
}
if (replica_count > 0) {
@@ -1528,19 +1392,20 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
}
- if (glusterd_is_volume_replicate(volinfo)) {
+ glusterd_add_peers_to_auth_list(volname);
+
+ if (replica_count && glusterd_is_volume_replicate(volinfo)) {
/* Do not allow add-brick for stopped volumes when replica-count
* is being increased.
*/
- if (conf->op_version >= GD_OP_VERSION_3_7_10 &&
- !dict_getn(dict, "attach-tier", SLEN("attach-tier")) &&
- replica_count && GLUSTERD_STATUS_STOPPED == volinfo->status) {
+ if (GLUSTERD_STATUS_STOPPED == volinfo->status &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
ret = -1;
snprintf(msg, sizeof(msg),
" Volume must not be in"
" stopped state when replica-count needs to "
" be increased.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1548,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* op-version check for replica 2 to arbiter conversion. If we
* don't have this check, an older peer added as arbiter brick
* will not have the arbiter xlator in its volfile. */
- if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) &&
- (replica_count == 3)) {
- ret = -1;
- snprintf(msg, sizeof(msg),
- "Cluster op-version must "
- "be >= 30800 to add arbiter brick to a "
- "replica 2 volume.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
- msg);
- *op_errstr = gf_strdup(msg);
- goto out;
+ if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) {
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "No arbiter count present in the dict");
+ } else if (arbiter_count == 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
}
/* Do not allow increasing replica count for arbiter volumes. */
- if (replica_count && volinfo->arbiter_count) {
+ if (volinfo->arbiter_count) {
ret = -1;
snprintf(msg, sizeof(msg),
"Increasing replica count "
"for arbiter volumes is not supported.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1575,6 +1446,43 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
is_force = dict_get_str_boolean(dict, "force", _gf_false);
+ /* Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ * doing this check at the originator node is sufficient.
+ */
+
+ if (!is_force && is_origin_glusterd(dict)) {
+ ret = 0;
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ gf_msg_debug(this->name, 0,
+ "Replicate cluster type "
+ "found. Checking brick order.");
+ if (replica_count)
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ replica_count);
+ else
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ volinfo->replica_count);
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ gf_msg_debug(this->name, 0,
+ "Disperse cluster type"
+ " found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname,
+ &bricks, &count,
+ volinfo->disperse_count);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+ "Not adding brick because of "
+ "bad brick order. %s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ }
+
if (volinfo->replica_count < replica_count && !is_force) {
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -1591,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (len < 0) {
strcpy(msg, "<error>");
}
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1623,46 +1531,40 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"Volume name %s rebalance is in "
"progress. Please retry after completion",
volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- /*
- * This check is needed because of add/remove brick
- * is not supported on a tiered volume. So once a tier
- * is attached we cannot commit or stop the remove-brick
- * task. Please change this comment once we start supporting
- * add/remove brick on a tiered volume.
- */
- if (!gd_is_remove_brick_committed(volinfo)) {
- snprintf(msg, sizeof(msg),
- "An earlier remove-brick "
- "task exists for volume %s. Either commit it"
- " or stop it before attaching a tier.",
- volinfo->volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OLD_REMOVE_BRICK_EXISTS,
- "%s", msg);
- *op_errstr = gf_strdup(msg);
- ret = -1;
+ if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
+ msg[0] = '\0';
+ }
+
+ if (!count) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
goto out;
}
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
- goto out;
- }
-
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
- goto out;
+ if (!bricks) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
}
if (bricks) {
@@ -1681,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"brick path %s is "
"too long",
brick);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
msg);
*op_errstr = gf_strdup(msg);
@@ -1692,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
NULL);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
"Add-brick: Unable"
" to get brickinfo");
goto out;
@@ -1708,18 +1610,6 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
-#ifdef HAVE_BD_XLATOR
- if (brickinfo->vg[0]) {
- ret = glusterd_is_valid_vg(brickinfo, 1, msg);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_VG,
- "%s", msg);
- *op_errstr = gf_strdup(msg);
- goto out;
- }
- }
-#endif
-
ret = glusterd_validate_and_create_brickpath(
brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
is_force, _gf_false);
@@ -1774,7 +1664,7 @@ out:
GF_FREE(str_ret);
GF_FREE(all_bricks);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1798,6 +1688,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
};
glusterd_conf_t *priv = THIS->private;
int pid = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
/* Check whether all the nodes of the bricks to be removed are
* up, if not fail the operation */
@@ -1806,6 +1698,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
ret = dict_get_strn(dict, key, keylen, &brick);
if (ret) {
snprintf(msg, sizeof(msg), "Unable to get %s", key);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "key=%s", key, NULL);
*errstr = gf_strdup(msg);
goto out;
}
@@ -1817,54 +1711,30 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Incorrect brick "
"%s for volume %s",
brick, volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK,
+ "Brick=%s, Volume=%s", brick, volinfo->volname, NULL);
*errstr = gf_strdup(msg);
goto out;
}
/* Do not allow commit if the bricks are not decommissioned
- * if its a remove brick commit or detach-tier commit
+ * if its a remove brick commit
*/
- if (!brickinfo->decommissioned) {
- if (cmd == GF_OP_CMD_COMMIT) {
- snprintf(msg, sizeof(msg),
- "Brick %s "
- "is not decommissioned. "
- "Use start or force option",
- brick);
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
-
- if (cmd == GF_OP_CMD_DETACH_COMMIT ||
- cmd_defrag == GF_DEFRAG_CMD_DETACH_COMMIT) {
- snprintf(msg, sizeof(msg),
- "Bricks in Hot "
- "tier are not decommissioned yet. Use "
- "gluster volume tier <VOLNAME> "
- "detach start to start the decommission process");
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
- } else {
- if ((cmd == GF_OP_CMD_DETACH_COMMIT ||
- (cmd_defrag == GF_DEFRAG_CMD_DETACH_COMMIT)) &&
- (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED)) {
- snprintf(msg, sizeof(msg),
- "Bricks in Hot "
- "tier are not decommissioned yet. Wait for "
- "the detach to complete using gluster volume "
- "tier <VOLNAME> status.");
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
+ if (!brickinfo->decommissioned && cmd == GF_OP_CMD_COMMIT) {
+ snprintf(msg, sizeof(msg),
+ "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM,
+ "Use 'start' or 'force' option, Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
}
if (glusterd_is_local_brick(THIS, volinfo, brickinfo)) {
switch (cmd) {
case GF_OP_CMD_START:
- case GF_OP_CMD_DETACH_START:
goto check;
case GF_OP_CMD_NONE:
default:
@@ -1872,8 +1742,6 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
}
switch (cmd_defrag) {
- case GF_DEFRAG_CMD_DETACH_START:
- break;
case GF_DEFRAG_CMD_NONE:
default:
continue;
@@ -1885,6 +1753,10 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"brick %s. Use force option to "
"remove the offline brick",
brick);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED,
+ "Use 'force' option to remove the offline brick, Brick=%s",
+ brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1895,6 +1767,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Found dead "
"brick %s",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1904,29 +1778,33 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
continue;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_uuid(brickinfo->uuid);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
snprintf(msg, sizeof(msg),
"Host node of the "
"brick %s is not in cluster",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
- rcu_read_unlock();
goto out;
}
if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
snprintf(msg, sizeof(msg),
"Host node of the "
"brick %s is down",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
- rcu_read_unlock();
goto out;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
}
out:
@@ -2001,6 +1879,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"Deleting all the bricks of the "
"volume is not allowed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL);
ret = -1;
goto out;
}
@@ -2009,24 +1888,13 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_OP_CMD_NONE:
errstr = gf_strdup("no remove-brick command issued");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD,
+ NULL);
goto out;
case GF_OP_CMD_STATUS:
ret = 0;
goto out;
-
- case GF_OP_CMD_DETACH_START:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
-
case GF_OP_CMD_START: {
if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
dict_getn(dict, "replica-count", SLEN("replica-count"))) {
@@ -2041,21 +1909,12 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
}
if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "Volume %s needs "
- "to be started before detach-tier "
- "(you can use 'force' or 'commit' "
- "to override this behavior)",
- volinfo->volname);
- } else {
- snprintf(msg, sizeof(msg),
- "Volume %s needs "
- "to be started before remove-brick "
- "(you can use 'force' or 'commit' "
- "to override this behavior)",
- volinfo->volname);
- }
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs "
+ "to be started before remove-brick "
+ "(you can use 'force' or 'commit' "
+ "to override this behavior)",
+ volinfo->volname);
errstr = gf_strdup(msg);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED,
"%s", errstr);
@@ -2104,6 +1963,21 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
goto out;
}
+ if (volinfo->snap_count > 0 ||
+ !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect "
+ "snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s",
+ msg);
+ msg[0] = '\0';
+ }
+
ret = glusterd_remove_brick_validate_bricks(
cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
if (ret)
@@ -2132,55 +2006,16 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
}
case GF_OP_CMD_STOP:
- case GF_OP_CMD_STOP_DETACH_TIER:
ret = 0;
break;
- case GF_OP_CMD_DETACH_COMMIT:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
- if (volinfo->decommission_in_progress) {
- errstr = gf_strdup(
- "use 'force' option as migration "
- "is in progress");
- goto out;
- }
- if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) {
- errstr = gf_strdup(
- "use 'force' option as migration "
- "has failed");
- goto out;
- }
-
- ret = glusterd_remove_brick_validate_bricks(
- cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
- if (ret)
- goto out;
-
- /* If geo-rep is configured, for this volume, it should be
- * stopped.
- */
- param.volinfo = volinfo;
- ret = glusterd_check_geo_rep_running(&param, op_errstr);
- if (ret || param.is_active) {
- ret = -1;
- goto out;
- }
- break;
-
case GF_OP_CMD_COMMIT:
if (volinfo->decommission_in_progress) {
errstr = gf_strdup(
"use 'force' option as migration "
"is in progress");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG,
+ "Use 'force' option", NULL);
goto out;
}
@@ -2188,9 +2023,27 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"use 'force' option as migration "
"has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option", NULL);
goto out;
}
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) {
+ if (volinfo->rebal.rebalance_failures > 0 ||
+ volinfo->rebal.skipped_files > 0) {
+ errstr = gf_strdup(
+ "use 'force' option as migration "
+ "of some files might have been skipped or "
+ "has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option, some files might have been "
+ "skipped",
+ NULL);
+ goto out;
+ }
+ }
+
ret = glusterd_remove_brick_validate_bricks(
cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
if (ret)
@@ -2208,18 +2061,11 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
break;
- case GF_OP_CMD_DETACH_COMMIT_FORCE:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
case GF_OP_CMD_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_STOP_DETACH_TIER:
break;
}
ret = 0;
@@ -2230,7 +2076,8 @@ out:
if (op_errstr)
*op_errstr = errstr;
}
-
+ if (!op_errstr && errstr)
+ GF_FREE(errstr);
return ret;
}
@@ -2312,48 +2159,6 @@ glusterd_remove_brick_migrate_cbk(glusterd_volinfo_t *volinfo,
return ret;
}
-static int
-glusterd_op_perform_attach_tier(dict_t *dict, glusterd_volinfo_t *volinfo,
- int count, char *bricks)
-{
- int ret = 0;
- int replica_count = 0;
- int type = 0;
-
- /*
- * Store the new (cold) tier's structure until the graph is generated.
- * If there is a failure before the graph is generated the
- * structure will revert to its original state.
- */
- volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
- volinfo->tier_info.cold_type = volinfo->type;
- volinfo->tier_info.cold_brick_count = volinfo->brick_count;
- volinfo->tier_info.cold_replica_count = volinfo->replica_count;
- volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
- volinfo->tier_info.cold_redundancy_count = volinfo->redundancy_count;
-
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &replica_count);
- if (!ret)
- volinfo->tier_info.hot_replica_count = replica_count;
- else
- volinfo->tier_info.hot_replica_count = 1;
- volinfo->tier_info.hot_brick_count = count;
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- volinfo->tier_info.hot_type = type;
- ret = dict_set_int32n(dict, "type", SLEN("type"), GF_CLUSTER_TYPE_TIER);
-
- if (!ret)
- ret = dict_set_nstrn(volinfo->dict, "features.ctr-enabled",
- SLEN("features.ctr-enabled"), "on", SLEN("on"));
-
- if (!ret)
- ret = dict_set_nstrn(volinfo->dict, "cluster.tier-mode",
- SLEN("cluster.tier-mode"), "cache", SLEN("cache"));
-
- return ret;
-}
-
int
glusterd_op_add_brick(dict_t *dict, char **op_errstr)
{
@@ -2401,11 +2206,6 @@ glusterd_op_add_brick(dict_t *dict, char **op_errstr)
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- gf_msg_debug(THIS->name, 0, "Adding tier");
- glusterd_op_perform_attach_tier(dict, volinfo, count, bricks);
- }
-
ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
@@ -2434,94 +2234,118 @@ out:
}
int
-glusterd_op_add_tier_brick(dict_t *dict, char **op_errstr)
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr)
{
int ret = 0;
char *volname = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char *bricks = NULL;
- int32_t count = 0;
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
- ret = glusterd_volinfo_find(volname, &volinfo);
+int
+glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
- "Volume not found");
+ gf_msg_debug(this->name, 0, "volname not found");
goto out;
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
goto out;
}
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- gf_msg_debug(THIS->name, 0, "Adding tier");
- glusterd_op_perform_attach_tier(dict, volinfo, count, bricks);
- }
+ /* remove brick task id is generted in glusterd_op_stage_remove_brick(),
+ * but rsp_dict is unavailable there. So copying it to rsp_dict from
+ * req_dict here. */
- ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
- "Unable to add bricks");
- goto out;
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING,
+ "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
}
- if (priv->op_version <= GD_OP_VERSION_3_10_0) {
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) &&
+ GD_OP_REMOVE_BRICK == volinfo->rebal.op) {
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
goto out;
- } else {
- /*
- * The cluster is operating at version greater than
- * gluster-3.10.0. So no need to store volfiles
- * in commit phase, the same will be done
- * in post validate phase with v3 framework.
- */
+ }
}
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_svcs_manager(volinfo);
-
out:
return ret;
}
-
-void
-glusterd_op_perform_detach_tier(glusterd_volinfo_t *volinfo)
-{
- volinfo->type = volinfo->tier_info.cold_type;
- volinfo->replica_count = volinfo->tier_info.cold_replica_count;
- volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
- volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count;
- volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
-}
-
int
glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
{
@@ -2538,8 +2362,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
int force = 0;
gf1_op_commands cmd = 0;
int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
char *task_id_str = NULL;
xlator_t *this = NULL;
dict_t *bricks_dict = NULL;
@@ -2547,11 +2369,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
int start_remove = 0;
uint32_t commit_hash = 0;
int defrag_cmd = 0;
- int detach_commit = 0;
- void *tier_info = NULL;
- char *cold_shd_key = NULL;
- char *hot_shd_key = NULL;
- int delete_key = 1;
glusterd_conf_t *conf = NULL;
this = THIS;
@@ -2582,7 +2399,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
cmd = flag;
- if ((GF_OP_CMD_START == cmd) || (GF_OP_CMD_DETACH_START == cmd))
+ if (GF_OP_CMD_START == cmd)
start_remove = 1;
/* Set task-id, if available, in ctx dict for operations other than
@@ -2622,35 +2439,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
goto out;
case GF_OP_CMD_STOP:
- case GF_OP_CMD_STOP_DETACH_TIER: {
- /* Fall back to the old volume file */
- cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
- brick_list)
- {
- if (!brickinfo->decommissioned)
- continue;
- brickinfo->decommissioned = 0;
- }
- ret = glusterd_create_volfiles_and_notify_services(volinfo);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
- goto out;
- }
-
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
- "failed to store volinfo");
- goto out;
- }
-
- ret = 0;
- goto out;
- }
-
- case GF_OP_CMD_DETACH_START:
case GF_OP_CMD_START:
/* Reset defrag status to 'NOT STARTED' whenever a
* remove-brick/rebalance command is issued to remove
@@ -2658,6 +2446,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
* Update defrag_cmd as well or it will only be done
* for nodes on which the brick to be removed exists.
*/
+ /* coverity[MIXED_ENUMS] */
volinfo->rebal.defrag_cmd = cmd;
volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
@@ -2676,43 +2465,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
force = 1;
break;
- case GF_OP_CMD_DETACH_COMMIT:
- case GF_OP_CMD_DETACH_COMMIT_FORCE:
- glusterd_op_perform_detach_tier(volinfo);
- detach_commit = 1;
-
- /* Disabling ctr when detaching a tier, since
- * currently tier is the only consumer of ctr.
- * Revisit this code when this constraint no
- * longer exist.
- */
- dict_deln(volinfo->dict, "features.ctr-enabled",
- SLEN("features.ctr-enabled"));
- dict_deln(volinfo->dict, "cluster.tier-mode",
- SLEN("cluster.tier-mode"));
-
- hot_shd_key = gd_get_shd_key(volinfo->tier_info.hot_type);
- cold_shd_key = gd_get_shd_key(volinfo->tier_info.cold_type);
- if (hot_shd_key) {
- /*
- * Since post detach, shd graph will not contain hot
- * tier. So we need to clear option set for hot tier.
- * For a tiered volume there can be different key
- * for both hot and cold. If hot tier is shd compatible
- * then we need to remove the configured value when
- * detaching a tier, only if the key's are different or
- * cold key is NULL. So we will set delete_key first,
- * and if cold key is not null and they are equal then
- * we will clear the flag. Otherwise we will delete the
- * key.
- */
- if (cold_shd_key)
- delete_key = strcmp(hot_shd_key, cold_shd_key);
- if (delete_key)
- dict_del(volinfo->dict, hot_shd_key);
- }
- /* fall through */
-
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2732,6 +2484,11 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
ret = 0;
force = 1;
break;
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ break;
}
ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
@@ -2740,10 +2497,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
"Unable to get count");
goto out;
}
-
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- count = glusterd_set_detach_bricks(dict, volinfo);
-
/* Save the list of bricks for later usage only on starting a
* remove-brick. Right now this is required for displaying the task
* parameters with task status in volume status.
@@ -2796,12 +2549,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
i++;
}
- if (detach_commit) {
- /* Clear related information from volinfo */
- tier_info = ((void *)(&volinfo->tier_info));
- memset(tier_info, 0, sizeof(volinfo->tier_info));
- }
-
if (start_remove)
volinfo->rebal.dict = dict_ref(bricks_dict);
@@ -2824,8 +2571,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
* volumes undergoing a detach operation, they should not
* be modified here.
*/
- if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH_COMMIT) &&
- (cmd != GF_OP_CMD_DETACH_COMMIT_FORCE)) {
+ if (replica_count == 1) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2863,7 +2609,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
"Unable to reconfigure NFS-Server");
@@ -2886,8 +2632,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
/* perform the rebalance operations */
defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
- if (cmd == GF_OP_CMD_DETACH_START)
- defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
/*
* We need to set this *before* we issue commands to the
* bricks, or else we might end up setting it after the bricks
@@ -2916,7 +2660,7 @@ out:
GF_FREE(brick_tmpstr);
if (bricks_dict)
dict_unref(bricks_dict);
-
+ gf_msg_debug(this->name, 0, "returning %d ", ret);
return ret;
}
@@ -3034,202 +2778,19 @@ out:
}
int
-__glusterd_handle_add_tier_brick(rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- gf_cli_req cli_req = {{
- 0,
- }};
- dict_t *dict = NULL;
- char *bricks = NULL;
- char *volname = NULL;
- int brick_count = 0;
- void *cli_rsp = NULL;
- char err_str[2048] = "";
- gf_cli_rsp rsp = {
- 0,
- };
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- int32_t replica_count = 0;
- int32_t arbiter_count = 0;
- int type = 0;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, req, out);
-
- ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
- if (ret < 0) {
- /*failed to decode msg*/
- req->rpc_err = GARBAGE_ARGS;
- snprintf(err_str, sizeof(err_str), "Garbage args received");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, "%s",
- err_str);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ADD_BRICK_REQ_RECVD,
- "Received add brick req");
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new();
-
- ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to "
- "unserialize req-buffer to dictionary");
- snprintf(err_str, sizeof(err_str),
- "Unable to decode "
- "the command");
- goto out;
- }
- }
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
-
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "name");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- if (!glusterd_check_volume_exists(volname)) {
- snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "brick count");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &replica_count);
- if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
- "replica-count is %d", replica_count);
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
- "arbiter-count is %d", arbiter_count);
- }
-
- if (!dict_getn(dict, "force", SLEN("force"))) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Failed to get flag");
- ret = -1;
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volinfo "
- "for volume name %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
- err_str);
- goto out;
- }
-
- if (glusterd_is_tiering_supported(err_str) == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(err_str, sizeof(err_str), "Volume %s is already a tier.",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_TIER, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "failed to get type from dictionary");
- goto out;
- }
- }
-
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "bricks");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- if (type != volinfo->type) {
- ret = dict_set_int32n(dict, "type", SLEN("type"), type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "failed to set the new type in dict");
- goto out;
- }
- }
-
- ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_ADD_TIER_BRICK, dict);
-
-out:
- if (ret) {
- rsp.op_ret = -1;
- rsp.op_errno = 0;
- if (err_str[0] == '\0')
- snprintf(err_str, sizeof(err_str), "Operation failed");
- rsp.op_errstr = err_str;
- cli_rsp = &rsp;
- glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp,
- dict);
- ret = 0; /*sent error to cli, prevent second reply*/
- }
-
- free(cli_req.dict.dict_val); /*its malloced by xdr*/
-
- return ret;
-}
-
-int
glusterd_handle_add_tier_brick(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_add_tier_brick);
+ return 0;
}
int
glusterd_handle_attach_tier(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_add_brick);
+ return 0;
}
int
glusterd_handle_detach_tier(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_remove_brick);
+ return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
index f84a7617549..5c01f0c70b6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "glusterd.h"
#include "glusterd-conn-mgmt.h"
@@ -26,8 +26,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
xlator_t *this = THIS;
glusterd_svc_t *svc = NULL;
- if (!this)
+ if (!this) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
svc = glusterd_conn_get_svc_object(conn);
if (!svc) {
@@ -36,14 +45,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
goto out;
}
- ret = rpc_transport_unix_options_build(&options, sockpath, frame_timeout);
+ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
if (ret)
goto out;
ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
SLEN("transport.socket.ignore-enoent"), 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=transport.socket.ignore-enoent", NULL);
goto out;
+ }
/* @options is free'd by rpc_transport when destroyed */
rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
@@ -57,15 +69,18 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
goto out;
ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
- else
+ } else
ret = 0;
conn->frame_timeout = frame_timeout;
conn->rpc = rpc;
conn->notify = notify;
out:
+ if (options)
+ dict_unref(options);
if (ret) {
if (rpc) {
rpc_clnt_unref(rpc);
@@ -91,7 +106,7 @@ glusterd_conn_connect(glusterd_conn_t *conn)
int
glusterd_conn_disconnect(glusterd_conn_t *conn)
{
- rpc_clnt_disconnect(conn->rpc);
+ rpc_clnt_disable(conn->rpc);
return 0;
}
@@ -132,3 +147,45 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
glusterd_set_socket_filepath(sockfilepath, socketpath, len);
return 0;
}
+
+int
+__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_conf_t *conf = THIS->private;
+ glusterd_svc_proc_t *mux_proc = mydata;
+ int ret = -1;
+
+ /* Silently ignoring this error, exactly like the current
+ * implementation */
+ if (!mux_proc)
+ return 0;
+
+ if (event == RPC_CLNT_DESTROY) {
+ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the
+ * list. So it is safe to call without lock. Processing
+ * RPC_CLNT_DESTROY under a lock will lead to deadlock.
+ */
+ if (mux_proc->data) {
+ glusterd_volinfo_unref(mux_proc->data);
+ mux_proc->data = NULL;
+ }
+ GF_FREE(mux_proc);
+ ret = 0;
+ } else {
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = mux_proc->notify(mux_proc, event);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ }
+ return ret;
+}
+
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_muxsvc_conn_common_notify);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
index 602c0ba7b84..1b225621ab1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
@@ -20,11 +20,11 @@ typedef int (*glusterd_conn_notify_t)(glusterd_conn_t *conn,
struct glusterd_conn_ {
struct rpc_clnt *rpc;
- char sockpath[PATH_MAX];
- int frame_timeout;
/* Existing daemons tend to specialize their respective
* notify implementations, so ... */
glusterd_conn_notify_t notify;
+ int frame_timeout;
+ char sockpath[PATH_MAX];
};
int
@@ -43,9 +43,11 @@ glusterd_conn_disconnect(glusterd_conn_t *conn);
int
glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
rpc_clnt_event_t event, void *data);
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
int32_t
glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
int len);
-
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h
index 7e1575b57af..c74070e0e8d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-errno.h
+++ b/xlators/mgmt/glusterd/src/glusterd-errno.h
@@ -27,7 +27,7 @@ enum glusterd_op_errno {
EG_ISSNAP = 30813, /* Volume is a snap volume */
EG_GEOREPRUN = 30814, /* Geo-Replication is running */
EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */
- EG_NOGANESHA = 30816, /* obsolete ganesha is not enabled */
+ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */
};
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
new file mode 100644
index 00000000000..f08bd6cebee
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -0,0 +1,927 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/common-utils.h>
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
+
+#include <ctype.h>
+
+int
+start_ganesha(char **op_errstr);
+
+typedef struct service_command {
+ char *binary;
+ char *service;
+ int (*action)(struct service_command *, char *);
+} service_command;
+
+/* parsing_ganesha_ha_conf will allocate the returned string
+ * to be freed (GF_FREE) by the caller
+ * return NULL if error or not found */
+static char *
+parsing_ganesha_ha_conf(const char *key)
+{
+#define MAX_LINE 1024
+ char scratch[MAX_LINE * 2] = {
+ 0,
+ };
+ char *value = NULL, *pointer = NULL, *end_pointer = NULL;
+ FILE *fp;
+
+ fp = fopen(GANESHA_HA_CONF, "r");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "couldn't open the file %s", GANESHA_HA_CONF);
+ goto end_ret;
+ }
+ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) {
+ /* Read config file until we get matching "^[[:space:]]*key" */
+ if (*pointer == '#') {
+ continue;
+ }
+ while (isblank(*pointer)) {
+ pointer++;
+ }
+ if (strncmp(pointer, key, strlen(key))) {
+ continue;
+ }
+ pointer += strlen(key);
+ /* key found : if we fail to parse, we'll return an error
+ * rather than trying next one
+ * - supposition : conf file is bash compatible : no space
+ * around the '=' */
+ if (*pointer != '=') {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s",
+ GANESHA_HA_CONF, key);
+ goto end_close;
+ }
+ pointer++; /* jump the '=' */
+
+ if (*pointer == '"' || *pointer == '\'') {
+ /* dont get the quote */
+ pointer++;
+ }
+ end_pointer = pointer;
+ /* stop at the next closing quote or blank/newline */
+ do {
+ end_pointer++;
+ } while (!(*end_pointer == '\'' || *end_pointer == '"' ||
+ isspace(*end_pointer) || *end_pointer == '\0'));
+ *end_pointer = '\0';
+
+ /* got it. copy it and return */
+ value = gf_strdup(pointer);
+ break;
+ }
+
+end_close:
+ fclose(fp);
+end_ret:
+ return value;
+}
+
+static int
+sc_systemctl_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, command, sc->service, NULL);
+ return runner_run(&runner);
+}
+
+static int
+sc_service_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, sc->service, command, NULL);
+ return runner_run(&runner);
+}
+
+static int
+manage_service(char *action)
+{
+ int i = 0;
+ int ret = 0;
+ struct service_command sc_list[] = {{.binary = "/bin/systemctl",
+ .service = "nfs-ganesha",
+ .action = sc_systemctl_action},
+ {.binary = "/sbin/invoke-rc.d",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = "/sbin/service",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = NULL}};
+
+ while (sc_list[i].binary != NULL) {
+ ret = sys_access(sc_list[i].binary, X_OK);
+ if (ret == 0) {
+ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary);
+ return sc_list[i].action(&sc_list[i], action);
+ }
+ i++;
+ }
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR,
+ "Could not %s NFS-Ganesha.Service manager for distro"
+ " not recognized.",
+ action);
+ return ret;
+}
+
+/*
+ * Check if the cluster is a ganesha cluster or not *
+ */
+gf_boolean_t
+glusterd_is_ganesha_cluster()
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t ret_bool = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("ganesha", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == _gf_true) {
+ ret_bool = _gf_true;
+ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster");
+ } else
+ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster");
+
+out:
+ return ret_bool;
+}
+
+/* Check if ganesha.enable is set to 'on', that checks if
+ * a particular volume is exported via NFS-Ganesha */
+gf_boolean_t
+glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo)
+{
+ char *value = NULL;
+ gf_boolean_t is_exported = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value);
+ if ((ret == 0) && value) {
+ if (strcmp(value, "on") == 0) {
+ gf_msg_debug(THIS->name, 0,
+ "ganesha.enable set"
+ " to %s",
+ value);
+ is_exported = _gf_true;
+ }
+ }
+ return is_exported;
+}
+
+/* *
+ * The below function is called as part of commit phase for volume set option
+ * "ganesha.enable". If the value is "on", it creates export configuration file
+ * and then export the volume via dbus command. Incase of "off", the volume
+ * will be already unexported during stage phase, so it will remove the conf
+ * file from shared storage
+ */
+int
+glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+
+ if ((strcmp(key, "ganesha.enable") == 0)) {
+ if ((strcmp(value, "on")) && (strcmp(value, "off"))) {
+ gf_asprintf(errstr,
+ "Invalid value"
+ " for volume set command. Use on/off only.");
+ ret = -1;
+ goto out;
+ }
+ if (strcmp(value, "on") == 0) {
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+
+ } else if (is_origin_glusterd(dict)) {
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+ ret = manage_export_config(volname, "off", errstr);
+ }
+ }
+out:
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
+ "Handling NFS-Ganesha"
+ " op failed.");
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *value = NULL;
+ char *str = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "value", &value);
+ if (value == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "value not present.");
+ goto out;
+ }
+ /* This dict_get will fail if the user had never set the key before */
+ /*Ignoring the ret value and proceeding */
+ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) {
+ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ ret = start_ganesha(op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
+ "Could not start NFS-Ganesha");
+ }
+ } else {
+ ret = stop_ganesha(op_errstr);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not stop "
+ "NFS-Ganesha.");
+ }
+
+out:
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup("Error, Validation Failed");
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s",
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL);
+ } else {
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_op_set_ganesha(dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "key", &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get key in global option set");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "value", &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get value in global option set");
+ goto out;
+ }
+
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL,
+ "Initial NFS-Ganesha set up failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(priv->opts,
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " nfs-ganesha in dict.");
+ goto out;
+ }
+ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Could not fetch "
+ " global op version");
+ goto out;
+ }
+ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options(this, priv->opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
+ "Failed to store options");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* Following function parse GANESHA_HA_CONF
+ * The sample file looks like below,
+ * HA_NAME="ganesha-ha-360"
+ * HA_VOL_NAME="ha-state"
+ * HA_CLUSTER_NODES="server1,server2"
+ * VIP_rhs_1="10.x.x.x"
+ * VIP_rhs_2="10.x.x.x." */
+
+/* Check if the localhost is listed as one of nfs-ganesha nodes */
+gf_boolean_t
+check_host_list(void)
+{
+ glusterd_conf_t *priv = NULL;
+ char *hostname, *hostlist;
+ gf_boolean_t ret = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES");
+ if (hostlist == NULL) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF);
+ return _gf_false;
+ }
+
+ /* Hostlist is a comma separated list now */
+ hostname = strtok(hostlist, ",");
+ while (hostname != NULL) {
+ ret = gf_is_local_addr(hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND,
+ "ganesha host found "
+ "Hostname is %s",
+ hostname);
+ break;
+ }
+ hostname = strtok(NULL, ",");
+ }
+
+ GF_FREE(hostlist);
+ return ret;
+}
+
+int
+gd_ganesha_send_dbus(char *volname, char *value)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+ runinit(&runner);
+
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out);
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out);
+
+ ret = 0;
+ if (check_host_list()) {
+ /* Check whether ganesha is running on this node */
+ if (manage_service("status")) {
+ gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0,
+ GD_MSG_GANESHA_NOT_RUNNING,
+ "Export failed, NFS-Ganesha is not running");
+ } else {
+ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+ value, volname, NULL);
+ ret = runner_run(&runner);
+ }
+ }
+out:
+ return ret;
+}
+
+int
+manage_export_config(char *volname, char *value, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(volname);
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh",
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+ if (ret && op_errstr)
+ gf_asprintf(op_errstr,
+ "Failed to create"
+ " NFS-Ganesha export config file.");
+
+ return ret;
+}
+
+/* Exports and unexports a particular volume via NFS-Ganesha */
+int
+ganesha_manage_export(dict_t *dict, char *value,
+ gf_boolean_t update_cache_invalidation, char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t option = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "invalid value.");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_check_ganesha_export(volinfo);
+ if (ret && option) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'on'.");
+ ret = -1;
+ goto out;
+
+ } else if (!option && !ret) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'off'.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if global option is enabled, proceed only then */
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "global option dict.");
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ goto out;
+ }
+ if (!ret) {
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ ret = -1;
+ goto out;
+ }
+
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (option && is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "on", op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+ ret = gd_ganesha_send_dbus(volname, value);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details");
+ goto out;
+ }
+ if (update_cache_invalidation) {
+ vol_opts = volinfo->dict;
+ ret = dict_set_dynstr_with_alloc(vol_opts,
+ "features.cache-invalidation", value);
+ if (ret)
+ gf_asprintf(op_errstr,
+ "Cache-invalidation could not"
+ " be set to %s.",
+ value);
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_asprintf(op_errstr, "failed to store volinfo for %s",
+ volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+tear_down_cluster(gf_boolean_t run_teardown)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+
+ if (run_teardown) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ /* *
+ * Remove all the entries in CONFDIR expect ganesha.conf and
+ * ganesha-ha.conf
+ */
+ dir = sys_opendir(CONFDIR);
+ if (!dir) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to open directory %s. "
+ "Reason : %s",
+ CONFDIR, strerror(errno));
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name);
+ ret = sys_lstat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to stat entry %s :"
+ " %s",
+ path, strerror(errno));
+ goto out;
+ }
+
+ if (strcmp(entry->d_name, "ganesha.conf") == 0 ||
+ strcmp(entry->d_name, "ganesha-ha.conf") == 0)
+ gf_msg_debug(THIS->name, 0,
+ " %s is not required"
+ " to remove",
+ path);
+ else if (S_ISDIR(st.st_mode))
+ ret = recursive_rmdir(path);
+ else
+ ret = sys_unlink(path);
+
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ " Failed to remove %s. "
+ "Reason : %s",
+ path, strerror(errno));
+ }
+
+ gf_msg_debug(THIS->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+ goto exit;
+ }
+
+out:
+ if (dir && sys_closedir(dir)) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+exit:
+ return ret;
+}
+
+int
+setup_cluster(gf_boolean_t run_setup)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (run_setup) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ }
+ return ret;
+}
+
+static int
+teardown(gf_boolean_t run_teardown, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+
+ priv = THIS->private;
+
+ ret = tear_down_cluster(run_teardown);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "Cleanup of NFS-Ganesha"
+ " HA config failed.");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not clean up"
+ " NFS-Ganesha related config");
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ vol_opts = volinfo->dict;
+ /* All the volumes exported via NFS-Ganesha will be
+ unexported, hence setting the appropriate keys */
+ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set features.cache-invalidation "
+ "to off for %s",
+ volinfo->volname);
+
+ ret = dict_set_str(vol_opts, "ganesha.enable", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set ganesha.enable to off for %s",
+ volinfo->volname);
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo for %s", volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+stop_ganesha(char **op_errstr)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "removal of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ }
+ ret = manage_service("stop");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha service could not"
+ "be stopped.");
+ }
+ return ret;
+}
+
+int
+start_ganesha(char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+#ifdef BUILD_GNFS
+ /* Gluster-nfs has to be disabled across the trusted pool */
+ /* before attempting to start nfs-ganesha */
+ ret = dict_set_str_sizen(volinfo->dict, NFS_DISABLE_MAP_KEY, "on");
+ if (ret)
+ goto out;
+#endif
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to store the "
+ "Volume information");
+ goto out;
+ }
+ }
+
+ /* If the nfs svc is not initialized it means that the service is not
+ * running, hence we can skip the process of stopping gluster-nfs
+ * service
+ */
+#ifdef BUILD_GNFS
+ if (priv->nfs_svc.inited) {
+ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
+ if (ret) {
+ ret = -1;
+ gf_asprintf(op_errstr,
+ "Gluster-NFS service could"
+ "not be stopped, exiting.");
+ goto out;
+ }
+ }
+#endif
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "creation of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ goto out;
+ }
+ ret = manage_service("start");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha failed to start."
+ "Please see log file for details");
+ }
+
+out:
+ return ret;
+}
+
+static int
+pre_setup(gf_boolean_t run_setup, char **op_errstr)
+{
+ int ret = 0;
+ if (run_setup) {
+ if (!check_host_list()) {
+ gf_asprintf(op_errstr,
+ "Running nfs-ganesha setup command "
+ "from node which is not part of ganesha cluster");
+ return -1;
+ }
+ }
+ ret = setup_cluster(run_setup);
+ if (ret == -1)
+ gf_asprintf(op_errstr,
+ "Failed to set up HA "
+ "config for NFS-Ganesha. "
+ "Please check the log file for details");
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
+ char *value)
+{
+ int32_t ret = -1;
+ gf_boolean_t option = _gf_false;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ if (strcmp(key, "ganesha.enable") == 0) {
+ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* It is possible that the key might not be set */
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_asprintf(op_errstr, "Invalid value in key-value pair.");
+ goto out;
+ }
+
+ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) {
+ /* *
+ * The set up/teardown of pcs cluster should be performed only
+ * once. This will done on the node in which the cli command
+ * 'gluster nfs-ganesha <enable/disable>' got executed. So that
+ * node should part of ganesha HA cluster
+ */
+ if (option) {
+ ret = pre_setup(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = teardown(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 1aa38686d6f..bf062c87060 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -17,8 +17,8 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "glusterd-svc-helper.h"
-#include "run.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include "glusterd-messages.h"
#include <signal.h>
@@ -76,6 +76,19 @@ static char *gsync_reserved_opts[] = {"gluster-command",
static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance",
"log-rsync-performance", NULL};
+void
+set_gsyncd_inet6_arg(runner_t *runner)
+{
+ xlator_t *this = NULL;
+ char *af;
+ int ret;
+
+ this = THIS;
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret == 0)
+ runner_argprintf(runner, "--%s", af);
+}
+
int
__glusterd_handle_sys_exec(rpcsvc_request_t *req)
{
@@ -102,13 +115,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -129,13 +147,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=host-uuid", NULL);
goto out;
+ }
}
ret = glusterd_op_begin_synctask(req, cli_op, dict);
@@ -175,13 +198,18 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -202,6 +230,8 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -254,13 +284,18 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -281,6 +316,8 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -384,6 +421,7 @@ glusterd_urltransform_init(runner_t *runner, const char *transname)
{
runinit(runner);
runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(runner);
runner_argprintf(runner, "--%s-url", transname);
}
@@ -725,6 +763,7 @@ glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid)
runinit(&runner);
runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(&runner);
runner_add_arg(&runner, "--slavevoluuid-get");
runner_argprintf(&runner, "%s::%s", slave_host, slave_vol);
@@ -788,6 +827,7 @@ glusterd_gsync_get_config(char *master, char *slave, char *conf_path,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--config-get-all", NULL);
@@ -803,6 +843,7 @@ _fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data)
char *v = NULL;
char *k = NULL;
gf_gsync_status_t *sts_val = NULL;
+ size_t len = 0;
sts_val = (gf_gsync_status_t *)data;
@@ -836,47 +877,63 @@ _fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data)
}
if (strcmp(k, "worker_status") == 0) {
- memcpy(sts_val->worker_status, v, strlen(v));
- sts_val->worker_status[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->worker_status) - 1));
+ memcpy(sts_val->worker_status, v, len);
+ sts_val->worker_status[len] = '\0';
} else if (strcmp(k, "slave_node") == 0) {
- memcpy(sts_val->slave_node, v, strlen(v));
- sts_val->slave_node[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->slave_node) - 1));
+ memcpy(sts_val->slave_node, v, len);
+ sts_val->slave_node[len] = '\0';
} else if (strcmp(k, "crawl_status") == 0) {
- memcpy(sts_val->crawl_status, v, strlen(v));
- sts_val->crawl_status[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->crawl_status) - 1));
+ memcpy(sts_val->crawl_status, v, len);
+ sts_val->crawl_status[len] = '\0';
} else if (strcmp(k, "last_synced") == 0) {
- memcpy(sts_val->last_synced, v, strlen(v));
- sts_val->last_synced[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->last_synced) - 1));
+ memcpy(sts_val->last_synced, v, len);
+ sts_val->last_synced[len] = '\0';
} else if (strcmp(k, "last_synced_utc") == 0) {
- memcpy(sts_val->last_synced_utc, v, strlen(v));
- sts_val->last_synced_utc[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->last_synced_utc) - 1));
+ memcpy(sts_val->last_synced_utc, v, len);
+ sts_val->last_synced_utc[len] = '\0';
} else if (strcmp(k, "entry") == 0) {
- memcpy(sts_val->entry, v, strlen(v));
- sts_val->entry[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->entry) - 1));
+ memcpy(sts_val->entry, v, len);
+ sts_val->entry[len] = '\0';
} else if (strcmp(k, "data") == 0) {
- memcpy(sts_val->data, v, strlen(v));
- sts_val->data[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->data) - 1));
+ memcpy(sts_val->data, v, len);
+ sts_val->data[len] = '\0';
} else if (strcmp(k, "meta") == 0) {
- memcpy(sts_val->meta, v, strlen(v));
- sts_val->meta[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->meta) - 1));
+ memcpy(sts_val->meta, v, len);
+ sts_val->meta[len] = '\0';
} else if (strcmp(k, "failures") == 0) {
- memcpy(sts_val->failures, v, strlen(v));
- sts_val->failures[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->failures) - 1));
+ memcpy(sts_val->failures, v, len);
+ sts_val->failures[len] = '\0';
} else if (strcmp(k, "checkpoint_time") == 0) {
- memcpy(sts_val->checkpoint_time, v, strlen(v));
- sts_val->checkpoint_time[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time) - 1));
+ memcpy(sts_val->checkpoint_time, v, len);
+ sts_val->checkpoint_time[len] = '\0';
} else if (strcmp(k, "checkpoint_time_utc") == 0) {
- memcpy(sts_val->checkpoint_time_utc, v, strlen(v));
- sts_val->checkpoint_time_utc[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time_utc) - 1));
+ memcpy(sts_val->checkpoint_time_utc, v, len);
+ sts_val->checkpoint_time_utc[len] = '\0';
} else if (strcmp(k, "checkpoint_completed") == 0) {
- memcpy(sts_val->checkpoint_completed, v, strlen(v));
- sts_val->checkpoint_completed[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_completed) - 1));
+ memcpy(sts_val->checkpoint_completed, v, len);
+ sts_val->checkpoint_completed[len] = '\0';
} else if (strcmp(k, "checkpoint_completion_time") == 0) {
- memcpy(sts_val->checkpoint_completion_time, v, strlen(v));
- sts_val->checkpoint_completion_time[strlen(v)] = '\0';
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time) - 1));
+ memcpy(sts_val->checkpoint_completion_time, v, len);
+ sts_val->checkpoint_completion_time[len] = '\0';
} else if (strcmp(k, "checkpoint_completion_time_utc") == 0) {
- memcpy(sts_val->checkpoint_completion_time_utc, v, strlen(v));
- sts_val->checkpoint_completion_time_utc[strlen(v)] = '\0';
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time_utc) - 1));
+ memcpy(sts_val->checkpoint_completion_time_utc, v, len);
+ sts_val->checkpoint_completion_time_utc[len] = '\0';
}
GF_FREE(v);
GF_FREE(k);
@@ -900,6 +957,7 @@ glusterd_gsync_get_status(char *master, char *slave, char *conf_path,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--status-get", NULL);
@@ -920,6 +978,7 @@ glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--config-get", NULL);
@@ -1695,9 +1754,10 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
char *value = NULL;
char *slavekey = NULL;
char *slaveentry = NULL;
- char key[512] = {
+ char key[32] = {
0,
};
+ int keylen;
char *t = NULL;
xlator_t *this = NULL;
struct slave_vol_config slave1 = {
@@ -1775,15 +1835,15 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
if (ret == 0) { /* New slave */
dict_foreach(volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv);
- snprintf(key, sizeof(key), "slave%d", maxslv + 1);
+ keylen = snprintf(key, sizeof(key), "slave%d", maxslv + 1);
- ret = dict_set_dynstr(volinfo->gsync_slaves, key, value);
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
if (ret) {
GF_FREE(value);
goto out;
}
} else if (ret == -1) { /* Existing slave */
- snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx);
+ keylen = snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx);
gf_msg_debug(this->name, 0,
"Replacing key:%s with new value"
@@ -1791,7 +1851,7 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
key, value);
/* Add new slave's value, with the same slave index */
- ret = dict_set_dynstr(volinfo->gsync_slaves, key, value);
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
if (ret) {
GF_FREE(value);
goto out;
@@ -2215,6 +2275,9 @@ glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave,
"Volume %s needs to be started "
"before " GEOREP " start",
volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED,
+ "Volume is not in a started state, Volname=%s",
+ volinfo->volname, NULL);
goto out;
}
@@ -2271,7 +2334,6 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
char errmsg[PATH_MAX] = {
0,
};
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
char *conf_path = NULL;
@@ -2299,9 +2361,8 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
@@ -2309,7 +2370,6 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
" exist",
volname);
*op_errstr = gf_strdup(errmsg);
- ret = -1;
goto out;
}
@@ -2522,6 +2582,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s",
priv->workdir, filename);
if ((len < 0) || (len >= sizeof(abs_filename))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2534,6 +2595,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Realpath=%s, Reason=%s", priv->workdir, strerror(errno),
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2544,6 +2608,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
"Failed to get "
"realpath of %s: %s",
filename, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Filename=%s, Reason=%s", filename, strerror(errno), NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2553,6 +2619,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
will succeed for /var/lib/glusterd_bad */
len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir);
if ((len < 0) || (len >= sizeof(workdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2566,6 +2633,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2580,6 +2649,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
goto out;
}
@@ -2588,9 +2659,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
snprintf(errmsg, sizeof(errmsg),
"Source file"
" is not a regular file.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, "%s",
- errmsg);
ret = -1;
goto out;
}
@@ -2794,6 +2865,7 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
char *slave_ip = NULL;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
+ char *af = NULL;
this = THIS;
GF_ASSERT(this);
@@ -2808,8 +2880,11 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
*/
if (strstr(slave_url, "@")) {
slave_url_buf = gf_strdup(slave_url);
- if (!slave_url_buf)
+ if (!slave_url_buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Slave_url=%s", slave_url, NULL);
goto out;
+ }
slave_user = strtok_r(slave_url_buf, "@", &save_ptr);
slave_ip = strtok_r(NULL, "@", &save_ptr);
@@ -2824,8 +2899,8 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
goto out;
}
- snprintf(log_file_path, sizeof(log_file_path),
- DEFAULT_LOG_FILE_DIRECTORY "/create_verify_log");
+ snprintf(log_file_path, sizeof(log_file_path), "%s/create_verify_log",
+ priv->logdir);
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gverify.sh", NULL);
@@ -2835,9 +2910,16 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
runner_argprintf(&runner, "%s", slave_vol);
runner_argprintf(&runner, "%d", ssh_port);
runner_argprintf(&runner, "%s", log_file_path);
- gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s",
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ af = "-";
+
+ runner_argprintf(&runner, "%s", af);
+
+ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s",
runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3],
- runner.argv[4], runner.argv[5], runner.argv[6]);
+ runner.argv[4], runner.argv[5], runner.argv[6],
+ runner.argv[7]);
runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
synclock_unlock(&priv->big_lock);
ret = runner_run(&runner);
@@ -3097,7 +3179,6 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
gf_boolean_t is_force = -1;
gf_boolean_t is_no_verify = -1;
gf_boolean_t is_force_blocker = -1;
- gf_boolean_t exists = _gf_false;
gf_boolean_t is_template_in_use = _gf_false;
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
@@ -3147,18 +3228,15 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
"Volume name %s does not"
" exist",
volname);
- *op_errstr = gf_strdup(errmsg);
- gf_msg_debug(this->name, 0, "Returning %d", ret);
- return -1;
+ goto out;
}
ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url,
@@ -3399,6 +3477,12 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
goto out;
}
+ /* There is a remote possibility that slave_host can be NULL when
+ control reaches here. Add a check so we wouldn't crash in next
+ line */
+ if (!slave_host)
+ goto out;
+
/* Now, check whether session is already started.If so, warn!*/
is_different_slavehost = (strcmp(slave_host, slave1.old_slvhost) != 0)
? _gf_true
@@ -3493,7 +3577,6 @@ out:
if (slave_url_buf)
GF_FREE(slave_url_buf);
- gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -3572,7 +3655,6 @@ glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr)
char *statedir = NULL;
char *path_list = NULL;
char *conf_path = NULL;
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
char errmsg[PATH_MAX] = {
0,
@@ -3623,14 +3705,12 @@ glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
snprintf(errmsg, sizeof(errmsg),
"Volume name %s does not"
" exist",
volname);
- ret = -1;
goto out;
}
@@ -4064,6 +4144,7 @@ gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave,
out:
sys_close(pfd);
+ /* coverity[INTEGER_OVERFLOW] */
return ret;
}
@@ -4128,10 +4209,10 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path,
* still be alive, give some more time
* before SIGKILL (hack)
*/
- usleep(50000);
+ gf_nanosleep(50000 * GF_US_IN_NS);
break;
}
- usleep(50000);
+ gf_nanosleep(50000 * GF_US_IN_NS);
}
kill(-pid, SIGKILL);
sys_unlink(pidfile);
@@ -4140,7 +4221,7 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path,
out:
sys_close(pfd);
-
+ /* coverity[INTEGER_OVERFLOW] */
return ret;
}
@@ -5075,7 +5156,6 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char errmsg[PATH_MAX] = {
0,
};
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
char my_hostname[256] = {
@@ -5098,9 +5178,8 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
@@ -5108,7 +5187,6 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
" exist",
volname);
*op_errstr = gf_strdup(errmsg);
- ret = -1;
goto out;
}
@@ -5938,7 +6016,7 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
GF_ASSERT(this);
ret = glusterd_urltransform_single(slave, "normalize", &linearr);
- if (ret == -1) {
+ if ((ret == -1) || (linearr[0] == NULL)) {
ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave);
errmsg[ret] = '\0';
*op_errstr = gf_strdup(errmsg);
@@ -5949,7 +6027,10 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
tmp = strtok_r(linearr[0], "/", &save_ptr);
tmp = strtok_r(NULL, "/", &save_ptr);
- slave = strtok_r(tmp, ":", &save_ptr);
+ slave = NULL;
+ if (tmp != NULL) {
+ slave = strtok_r(tmp, ":", &save_ptr);
+ }
if (slave) {
ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr);
if (ret) {
@@ -6203,26 +6284,28 @@ create_conf_file(glusterd_conf_t *conf, char *conf_path)
/* log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "log-file",
- DEFAULT_LOG_FILE_DIRECTORY "/" GEOREP
- "/${mastervol}/${eSlave}.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner, "%s/%s/${mastervol}/${eSlave}.log", conf->logdir,
+ GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* changelog-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "changelog-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP "/${mastervol}/${eSlave}${local_id}-changes.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "changelog-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}-changes.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* gluster-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "gluster-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP "/${mastervol}/${eSlave}${local_id}.gluster.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}.gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* ignore-deletes */
@@ -6264,33 +6347,35 @@ create_conf_file(glusterd_conf_t *conf, char *conf_path)
/* log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log",
- ".", NULL);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* MountBroker log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "log-file-mbr",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log",
- ".", NULL);
+ runner_add_arg(&runner, "log-file-mbr");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/mbr/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* gluster-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "gluster-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/"
- "${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log",
- ".", NULL);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
out:
@@ -6341,7 +6426,7 @@ glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict,
ret = -1;
goto out;
}
- ret = mkdir_p(buf, 0777, _gf_true);
+ ret = mkdir_p(buf, 0755, _gf_true);
if (ret) {
len = snprintf(errmsg, sizeof(errmsg),
"Unable to create %s"
@@ -6356,13 +6441,13 @@ glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
- ret = snprintf(buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY "/" GEOREP "/%s",
+ ret = snprintf(buf, PATH_MAX, "%s/" GEOREP "/%s", conf->logdir,
volinfo->volname);
if ((ret < 0) || (ret >= PATH_MAX)) {
ret = -1;
goto out;
}
- ret = mkdir_p(buf, 0777, _gf_true);
+ ret = mkdir_p(buf, 0755, _gf_true);
if (ret) {
len = snprintf(errmsg, sizeof(errmsg),
"Unable to create %s"
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
index 5f5fe344406..7d1318f522c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
@@ -30,8 +30,8 @@ typedef struct glusterd_gsync_status_temp {
} glusterd_gsync_status_temp_t;
typedef struct gsync_status_param {
- int is_active;
glusterd_volinfo_t *volinfo;
+ int is_active;
} gsync_status_param_t;
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
index 0b56a0eb45a..319bfa140f3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
@@ -12,7 +12,7 @@
#include "glusterd-utils.h"
#include "glusterd-gfproxyd-svc-helper.h"
#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "glusterd-volgen.h"
void
@@ -81,7 +81,8 @@ glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo,
void
glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len)
{
- snprintf(logdir, len, "%s/gfproxy/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
+ glusterd_conf_t *conf = THIS->private;
+ snprintf(logdir, len, "%s/gfproxy/%s", conf->logdir, volname);
}
void
@@ -111,7 +112,7 @@ glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name,
goto out;
}
- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
tmp_fd = mkstemp(*tmpvol);
if (tmp_fd < 0) {
gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
index 0a78d4d1fd0..a0bfea41f0f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -18,7 +18,7 @@
#include "glusterd-svc-helper.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-gfproxyd-svc-helper.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
void
glusterd_gfproxydsvc_build(glusterd_svc_t *svc)
@@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s",
svc->proc.logdir, svc->proc.logfile);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -370,6 +375,7 @@ int
glusterd_gfproxydsvc_restart()
{
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
int ret = -1;
xlator_t *this = THIS;
glusterd_conf_t *conf = NULL;
@@ -380,7 +386,7 @@ glusterd_gfproxydsvc_restart()
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
{
/* Start per volume gfproxyd svc */
if (volinfo->status == GLUSTERD_STATUS_STARTED) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
index db1c8b1e7b0..d396b4015f3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
@@ -17,8 +17,8 @@
struct glusterd_gfproxydsvc_ {
glusterd_svc_t svc;
- int port;
gf_store_handle_t *handle;
+ int port;
};
typedef struct glusterd_gfproxydsvc_ glusterd_gfproxydsvc_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index ef1df3c3788..1b21c40596d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -9,24 +9,25 @@
*/
#include <inttypes.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "syscall.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "run.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/run.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
#include "glusterd-server-quorum.h"
#include "glusterd-store.h"
#include "glusterd-locks.h"
@@ -45,15 +46,11 @@
#include <sys/resource.h>
#include <inttypes.h>
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-syncop.h"
#include "glusterd-messages.h"
-#ifdef HAVE_BD_XLATOR
-#include <lvm2app.h>
-#endif
-
extern glusterd_op_info_t opinfo;
static int volcount;
@@ -94,16 +91,17 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
glusterd_friend_sm_event_t *event = NULL;
glusterd_friend_req_ctx_t *ctx = NULL;
char rhost[UNIX_PATH_MAX + 1] = {0};
- uuid_t friend_uuid = {0};
dict_t *dict = NULL;
- gf_uuid_parse(uuid_utoa(uuid), friend_uuid);
if (!port)
port = GF_DEFAULT_BASE_PORT;
ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost));
- rcu_read_lock();
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+ dict = dict_new();
+
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, rhost);
@@ -129,8 +127,6 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
event->peername = gf_strdup(peerinfo->hostname);
gf_uuid_copy(event->peerid, peerinfo->uuid);
- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
-
if (!ctx) {
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Unable to allocate memory");
@@ -143,8 +139,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ctx->hostname = gf_strdup(hostname);
ctx->req = req;
- dict = dict_new();
if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -152,9 +148,11 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
&dict);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
- else
+ } else
dict->extra_stdfree = friend_req->vols.vols_val;
ctx->vols = dict;
@@ -174,7 +172,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = GLUSTERD_CONNECTION_AWAITED;
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) {
if (ctx && ctx->hostname)
@@ -207,11 +205,14 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
if (!port)
port = GF_DEFAULT_BASE_PORT;
- rcu_read_lock();
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hostname);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
"Received remove-friend from unknown peer %s", hostname);
ret = glusterd_xfer_friend_remove_resp(req, hostname, port);
@@ -222,6 +223,7 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
&event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
"event generation failed: %d", ret);
goto out;
@@ -232,12 +234,11 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
gf_uuid_copy(event->peerid, uuid);
- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
-
if (!ctx) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Unable to allocate memory");
- ret = -1;
goto out;
}
@@ -251,6 +252,7 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = glusterd_friend_sm_inject_event(event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
"Unable to inject event %d, "
"ret = %d",
@@ -258,10 +260,11 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
goto out;
}
- ret = 0;
+ RCU_READ_UNLOCK;
+
+ return 0;
out:
- rcu_read_unlock();
if (0 != ret) {
if (ctx && ctx->hostname)
@@ -326,81 +329,6 @@ _build_option_key(dict_t *d, char *k, data_t *v, void *tmp)
}
int
-glusterd_add_tier_volume_detail_to_dict(glusterd_volinfo_t *volinfo,
- dict_t *dict, int count)
-{
- int ret = -1;
- char key[64] = {
- 0,
- };
- int keylen;
-
- GF_ASSERT(volinfo);
- GF_ASSERT(dict);
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_type", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->tier_info.cold_type);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_brick_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_dist_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_dist_leaf_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_replica_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_replica_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_arbiter_count", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->arbiter_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_disperse_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_disperse_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_redundancy_count",
- count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_redundancy_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_type", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->tier_info.hot_type);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_brick_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.hot_brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_replica_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.hot_replica_count);
- if (ret)
- goto out;
-
-out:
- return ret;
-}
-
-int
glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo,
dict_t *volumes, int count)
{
@@ -409,41 +337,18 @@ glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo,
};
int keylen;
int i = 0;
- int start_index = 0;
int ret = 0;
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- /*TODO: Add info for hot tier once attach tier of arbiter
- * volumes is supported. */
-
- /* cold tier */
- if (volinfo->tier_info.cold_replica_count == 1 ||
- volinfo->arbiter_count != 1)
- return 0;
-
- i = start_index = volinfo->tier_info.hot_brick_count + 1;
- for (; i <= volinfo->brick_count; i++) {
- if ((i - start_index + 1) % volinfo->tier_info.cold_replica_count !=
- 0)
- continue;
- keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
- count, i);
- ret = dict_set_int32n(volumes, key, keylen, 1);
- if (ret)
- return ret;
- }
- } else {
- if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
- return 0;
- for (i = 1; i <= volinfo->brick_count; i++) {
- if (i % volinfo->replica_count != 0)
- continue;
- keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
- count, i);
- ret = dict_set_int32n(volumes, key, keylen, 1);
- if (ret)
- return ret;
- }
+ if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
+ return 0;
+ for (i = 1; i <= volinfo->brick_count; i++) {
+ if (i % volinfo->replica_count != 0)
+ continue;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", count,
+ i);
+ ret = dict_set_int32n(volumes, key, keylen, 1);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -458,6 +363,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
};
int keylen;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
char *buf = NULL;
int i = 1;
dict_t *dict = NULL;
@@ -467,9 +373,12 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
0,
};
xlator_t *this = NULL;
- GF_UNUSED int caps = 0;
int32_t len = 0;
+ char ta_brick[4096] = {
+ 0,
+ };
+
GF_ASSERT(volinfo);
GF_ASSERT(volumes);
@@ -480,172 +389,129 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
keylen = snprintf(key, sizeof(key), "volume%d.name", count);
ret = dict_set_strn(volumes, key, keylen, volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.type", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.status", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->status);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_brick_count", count);
- ret = dict_set_int32n(volumes, key, keylen,
- volinfo->tier_info.hot_brick_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_add_tier_volume_detail_to_dict(volinfo, volumes, count);
- if (ret)
- goto out;
}
keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.transport", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
- if (!volume_id_str)
+ if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count);
ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
-#ifdef HAVE_BD_XLATOR
- if (volinfo->caps) {
- caps = 0;
- keylen = snprintf(key, sizeof(key), "volume%d.xlator0", count);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- if (volinfo->caps & CAPS_BD)
- snprintf(buf, 256, "BD");
- ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
-
- if (volinfo->caps & CAPS_THIN) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "thin");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_COPY) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_copy");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_snapshot");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_ZERO) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_zerofill");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
}
-#endif
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -658,42 +524,67 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname,
brickinfo->path);
if ((len < 0) || (len >= sizeof(brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
buf = gf_strdup(brick);
keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i);
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i);
snprintf(brick_uuid, sizeof(brick_uuid), "%s",
uuid_utoa(brickinfo->uuid));
buf = gf_strdup(brick_uuid);
- if (!buf)
+ if (!buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_uuid=%s", brick_uuid, NULL);
goto out;
+ }
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
-#ifdef HAVE_BD_XLATOR
- if (volinfo->caps & CAPS_BD) {
- snprintf(key, sizeof(key), "volume%d.vg%d", count, i);
- snprintf(brick, sizeof(brick), "%s", brickinfo->vg);
- buf = gf_strdup(brick);
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret)
- goto out;
}
-#endif
+
i++;
}
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s",
+ ta_brickinfo->hostname, ta_brickinfo->path);
+ if ((len < 0) || (len >= sizeof(ta_brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ buf = gf_strdup(ta_brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick",
+ count);
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+ }
+
ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL);
goto out;
+ }
dict = volinfo->dict;
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = 0;
goto out;
}
@@ -904,9 +795,9 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req)
gf_msg_debug(this->name, 0, "Received LOCK from uuid: %s",
uuid_utoa(lock_req.uuid));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -929,6 +820,7 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req)
op_ctx = dict_new();
if (!op_ctx) {
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
"Unable to set new dict");
goto out;
@@ -955,6 +847,9 @@ out:
glusterd_friend_sm();
glusterd_op_sm();
+ if (ret)
+ GF_FREE(ctx);
+
return ret;
}
@@ -985,11 +880,14 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
gf_msg_debug(this->name, 0, "Received op from uuid %s", str);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type);
if (!req_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -997,8 +895,8 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
req_ctx->op = op;
ret = dict_unserialize(buf_val, buf_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -1063,9 +961,9 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -1077,7 +975,11 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
/* In cases where there is no volname, the receivers won't have a
* transaction opinfo created, as for those operations, the locking
- * phase where the transaction opinfos are created, won't be called. */
+ * phase where the transaction opinfos are created, won't be called.
+ * skip_locking will be true for all such transaction and we clear
+ * the txn_opinfo after the staging phase, except for geo-replication
+ * operations where we need to access txn_opinfo in the later phases also.
+ */
ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
if (ret) {
gf_msg_debug(this->name, 0, "No transaction's opinfo set");
@@ -1086,7 +988,8 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op,
req_ctx->dict, req);
- txn_op_info.skip_locking = _gf_true;
+ if (req_ctx->op != GD_OP_GSYNC_SET)
+ txn_op_info.skip_locking = _gf_true;
ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
@@ -1144,9 +1047,9 @@ __glusterd_handle_commit_op(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -1267,12 +1170,12 @@ __glusterd_handle_cli_probe(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_hostname(hostname);
ret = (peerinfo && gd_peer_has_address(peerinfo, hostname));
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg_debug("glusterd", 0,
@@ -1336,6 +1239,7 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
glusterd_volinfo_t *tmp = NULL;
glusterd_snap_t *snapinfo = NULL;
glusterd_snap_t *tmpsnap = NULL;
+ gf_boolean_t need_free = _gf_false;
this = THIS;
GF_ASSERT(this);
@@ -1356,6 +1260,13 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
if (cli_req.dict.dict_len) {
dict = dict_new();
+ if (dict) {
+ need_free = _gf_true;
+ } else {
+ ret = -1;
+ goto out;
+ }
+
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
@@ -1451,12 +1362,17 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
&op_errno);
}
+ need_free = _gf_false;
+
out:
free(cli_req.dict.dict_val);
if (ret) {
ret = glusterd_xfer_cli_deprobe_resp(req, ret, op_errno, NULL, hostname,
dict);
+ if (need_free) {
+ dict_unref(dict);
+ }
}
glusterd_friend_sm();
@@ -1554,7 +1470,7 @@ __glusterd_handle_cli_get_volume(rpcsvc_request_t *req)
goto out;
}
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_GET_VOL_REQ_RCVD,
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_GET_VOL_REQ_RCVD,
"Received get vol req");
if (cli_req.dict.dict_len) {
@@ -1756,6 +1672,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
if (cli_req.dict.dict_len) {
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -1778,6 +1696,7 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1794,9 +1713,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize "
- "dictionary.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
ret = 0;
@@ -1815,6 +1733,10 @@ out:
glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ GF_FREE(rsp.dict.dict_val);
+
return 0;
}
int
@@ -1845,8 +1767,10 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
GF_ASSERT(priv);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
@@ -1858,8 +1782,11 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
}
ret = dict_set_int32n(dict, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
@@ -1881,6 +1808,8 @@ out:
if (dict)
dict_unref(dict);
+ GF_FREE(rsp.dict.dict_val);
+
glusterd_friend_sm();
glusterd_op_sm();
@@ -1904,6 +1833,85 @@ glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
return ret;
}
+int
+__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_GANESHA;
+ char *op_errstr = NULL;
+ char err_str[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ gf_msg_trace(this->name, 0, "Received global option request");
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict);
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd);
+}
+
static int
__glusterd_handle_reset_volume(rpcsvc_request_t *req)
{
@@ -2230,9 +2238,8 @@ glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr,
ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val,
&rsp.fsm_log.fsm_log_len);
if (ret < 0) {
- gf_msg("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
}
@@ -2278,6 +2285,7 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2286,17 +2294,17 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req)
conf = this->private;
ret = glusterd_sm_tr_log_add_to_dict(dict, &conf->op_sm_log);
} else {
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_hostname(cli_req.name);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s is not a peer", cli_req.name);
} else {
ret = glusterd_sm_tr_log_add_to_dict(dict, &peerinfo->sm_log);
+ RCU_READ_UNLOCK;
}
-
- rcu_read_unlock();
}
out:
@@ -2440,9 +2448,9 @@ __glusterd_handle_cluster_unlock(rpcsvc_request_t *req)
gf_msg_debug(this->name, 0, "Received UNLOCK from uuid: %s",
uuid_utoa(unlock_req.uuid));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(unlock_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_LOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -2504,8 +2512,8 @@ glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
@@ -2544,9 +2552,8 @@ glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
}
@@ -2721,7 +2728,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
{0},
};
dict_t *dict = NULL;
- char key[100] = {
+ char key[32] = {
0,
};
int keylen;
@@ -2753,11 +2760,11 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
if (glusterd_peerinfo_find(friend_req.uuid, NULL) == NULL) {
ret = -1;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
"Received friend update request "
@@ -2787,12 +2794,18 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_get_int32n(dict, "op", SLEN("op"), &op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=op", NULL);
goto out;
+ }
if (GD_FRIEND_UPDATE_DEL == op) {
(void)glusterd_handle_friend_update_delete(dict);
@@ -2816,7 +2829,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
snprintf(key, sizeof(key), "friend%d", i);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, NULL);
if (peerinfo == NULL) {
/* Create a new peer and add it to the list as there is
@@ -2861,7 +2874,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
}
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret)
break;
@@ -2964,7 +2977,7 @@ __glusterd_handle_probe_query(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(probe_req.uuid, remote_hostname);
if ((peerinfo == NULL) && (!cds_list_empty(&conf->peers))) {
rsp.op_ret = -1;
@@ -2984,7 +2997,7 @@ __glusterd_handle_probe_query(rpcsvc_request_t *req)
rsp.op_errno = GF_PROBE_ADD_FAILED;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
respond:
gf_uuid_copy(rsp.uuid, MY_UUID);
@@ -3031,10 +3044,13 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
0,
};
xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
@@ -3048,8 +3064,11 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict);
}
@@ -3075,12 +3094,21 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ if (conf->op_version < GD_OP_VERSION_6_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+ GD_OP_VERSION_6_0);
+ ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(
+ req, cli_op, dict);
+ }
out:
- glusterd_friend_sm();
- glusterd_op_sm();
-
free(cli_req.dict.dict_val);
if (ret) {
@@ -3267,6 +3295,7 @@ __glusterd_handle_umount(rpcsvc_request_t *req)
/* check if it is allowed to umount path */
path = gf_strdup(umnt_req.path);
if (!path) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
rsp.op_errno = ENOMEM;
goto out;
}
@@ -3334,25 +3363,26 @@ glusterd_friend_remove(uuid_t uuid, char *hostname)
int ret = -1;
glusterd_peerinfo_t *peerinfo = NULL;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hostname);
if (peerinfo == NULL) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
goto out;
}
ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid);
+ RCU_READ_UNLOCK;
if (ret)
gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
"Volumes cleanup failed");
- rcu_read_unlock();
/* Giving up the critical section here as glusterd_peerinfo_cleanup must
* be called from outside a critical section
*/
ret = glusterd_peerinfo_cleanup(peerinfo);
out:
gf_msg_debug(THIS->name, 0, "returning %d", ret);
+ /* coverity[LOCK] */
return ret;
}
@@ -3369,6 +3399,7 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
GF_ASSERT(this);
GF_ASSERT(options);
+ GF_VALIDATE_OR_GOTO(this->name, rpc, out);
if (force && rpc && *rpc) {
(void)rpc_clnt_unref(*rpc);
@@ -3381,7 +3412,6 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
goto out;
ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data);
- *rpc = new_rpc;
if (ret)
goto out;
ret = rpc_clnt_start(new_rpc);
@@ -3390,6 +3420,8 @@ out:
if (new_rpc) {
(void)rpc_clnt_unref(new_rpc);
}
+ } else {
+ *rpc = new_rpc;
}
gf_msg_debug(this->name, 0, "returning %d", ret);
@@ -3397,11 +3429,10 @@ out:
}
int
-glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
- int port)
+glusterd_transport_inet_options_build(dict_t *dict, const char *hostname,
+ int port, char *af)
{
xlator_t *this = NULL;
- dict_t *dict = NULL;
int32_t interval = -1;
int32_t time = -1;
int32_t timeout = -1;
@@ -3409,14 +3440,14 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
this = THIS;
GF_ASSERT(this);
- GF_ASSERT(options);
+ GF_ASSERT(dict);
GF_ASSERT(hostname);
if (!port)
port = GLUSTERD_DEFAULT_PORT;
/* Build default transport options */
- ret = rpc_transport_inet_options_build(&dict, hostname, port);
+ ret = rpc_transport_inet_options_build(dict, hostname, port, af);
if (ret)
goto out;
@@ -3456,7 +3487,6 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
if ((interval > 0) || (time > 0))
ret = rpc_transport_keepalive_options_set(dict, interval, time,
timeout);
- *options = dict;
out:
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
@@ -3470,10 +3500,19 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
int ret = -1;
glusterd_peerctx_t *peerctx = NULL;
data_t *data = NULL;
+ char *af = NULL;
peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t);
- if (!peerctx)
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
if (args)
peerctx->args = *args;
@@ -3485,8 +3524,12 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
uniquely identify a
peerinfo */
- ret = glusterd_transport_inet_options_build(&options, peerinfo->hostname,
- peerinfo->port);
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ gf_log(this->name, GF_LOG_TRACE,
+ "option transport.address-family is not set in xlator options");
+ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname,
+ peerinfo->port, af);
if (ret)
goto out;
@@ -3495,6 +3538,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
* create our RPC endpoint with the same address that the peer would
* use to reach us.
*/
+
if (this->options) {
data = dict_getn(this->options, "transport.socket.bind-address",
SLEN("transport.socket.bind-address"));
@@ -3536,6 +3580,9 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
peerctx = NULL;
ret = 0;
out:
+ if (options)
+ dict_unref(options);
+
GF_FREE(peerctx);
return ret;
}
@@ -3559,6 +3606,7 @@ glusterd_friend_add(const char *hoststr, int port,
*friend = glusterd_peerinfo_new(state, uuid, hoststr, port);
if (*friend == NULL) {
ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL);
goto out;
}
@@ -3657,7 +3705,7 @@ glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
GF_ASSERT(hoststr);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, hoststr);
if (peerinfo == NULL) {
@@ -3702,7 +3750,7 @@ glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg_debug("glusterd", 0, "returning %d", ret);
return ret;
}
@@ -3719,7 +3767,7 @@ glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
GF_ASSERT(hoststr);
GF_ASSERT(req);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hoststr);
if (peerinfo == NULL) {
@@ -3780,7 +3828,7 @@ glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
peerinfo->detaching = _gf_true;
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return ret;
}
@@ -4027,8 +4075,11 @@ set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr,
case GF_DEPROBE_BRICK_EXIST:
snprintf(errstr, len,
- "Brick(s) with the peer "
- "%s exist in cluster",
+ "Peer %s hosts one or more bricks. If the peer is in "
+ "not recoverable state then use either replace-brick "
+ "or remove-brick command with force to remove all "
+ "bricks from the peer and attempt the peer detach "
+ "again.",
hostname);
break;
@@ -4133,19 +4184,21 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
};
int keylen;
- priv = THIS->private;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
GF_ASSERT(priv);
friends = dict_new();
if (!friends) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
}
/* Reset ret to 0, needed to prevent failure in case no peers exist */
ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
if (!cds_list_empty(&priv->peers)) {
cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
{
@@ -4156,7 +4209,7 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
}
}
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret)
goto out;
@@ -4165,24 +4218,36 @@ unlock:
keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
uuid_utoa_r(MY_UUID, my_uuid_str);
ret = dict_set_strn(friends, key, keylen, my_uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
ret = dict_set_nstrn(friends, key, keylen, "localhost",
SLEN("localhost"));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
ret = dict_set_int32n(friends, key, keylen, 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val,
&rsp.friends.friends_len);
@@ -4354,8 +4419,11 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
@@ -4429,17 +4497,6 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
goto out;
}
- if ((cmd & GF_CLI_STATUS_TIERD) &&
- (conf->op_version < GD_OP_VERSION_3_10_0)) {
- snprintf(err_str, sizeof(err_str),
- "The cluster is operating "
- "at a lesser version than %d. Getting the status of "
- "tierd is not allowed in this state",
- GD_OP_VERSION_3_6_0);
- ret = -1;
- goto out;
- }
-
if ((cmd & GF_CLI_STATUS_SCRUB) &&
(conf->op_version < GD_OP_VERSION_3_7_0)) {
snprintf(err_str, sizeof(err_str),
@@ -4634,6 +4691,7 @@ __glusterd_handle_barrier(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -4986,6 +5044,7 @@ out:
&rsp.dict.dict_len);
glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ GF_FREE(rsp.dict.dict_val);
GF_FREE(key_fixed);
return ret;
}
@@ -5167,12 +5226,17 @@ glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo)
0,
};
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
gsync_rsp_dict = dict_new();
- if (!gsync_rsp_dict)
+ if (!gsync_rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = gethostname(my_hostname, sizeof(my_hostname));
if (ret) {
@@ -5199,7 +5263,7 @@ glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo,
glusterd_volinfo_t *tmp_vol = NULL;
glusterd_snap_t *snapinfo = NULL;
int snapcount = 0;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char snap_status_str[STATUS_STRLEN] = {
@@ -5312,19 +5376,30 @@ glusterd_print_client_details(FILE *fp, dict_t *dict,
brick_req->op = GLUSTERD_BRICK_STATUS;
brick_req->name = "";
+ brick_req->dict.dict_val = NULL;
+ brick_req->dict.dict_len = 0;
ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
@@ -5455,14 +5530,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
uint32_t get_state_cmd = 0;
uint64_t memtotal = 0;
uint64_t memfree = 0;
- int start_index = 0;
char id_str[64] = {
0,
};
char *vol_type_str = NULL;
- char *hot_tier_type_str = NULL;
- char *cold_tier_type_str = NULL;
char transport_type_str[STATUS_STRLEN] = {
0,
@@ -5476,7 +5548,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
char vol_status_str[STATUS_STRLEN] = {
0,
};
-
+ char brick_status_str[STATUS_STRLEN] = {
+ 0,
+ };
this = THIS;
GF_VALIDATE_OR_GOTO(THIS->name, this, out);
@@ -5519,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str);
if (ret) {
- now = time(NULL);
+ now = gf_time();
strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S",
localtime(&now));
gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp);
@@ -5530,10 +5604,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
gf_asprintf(&filename, "%s", tmp_str);
}
- if (odir[odirlen - 1] != '/')
- strcat(odir, "/");
+ ret = gf_asprintf(&ofilepath, "%s%s%s", odir,
+ ((odir[odirlen - 1] != '/') ? "/" : ""), filename);
- ret = gf_asprintf(&ofilepath, "%s%s", odir, filename);
if (ret < 0) {
GF_FREE(odir);
GF_FREE(filename);
@@ -5585,6 +5658,8 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
"fetch the value of all volume options "
"for volume %s",
volinfo->volname);
+ if (vol_all_opts)
+ dict_unref(vol_all_opts);
continue;
}
@@ -5609,8 +5684,8 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
if (priv->opts)
dict_foreach(priv->opts, glusterd_print_global_options, fp);
- rcu_read_lock();
fprintf(fp, "\n[Peers]\n");
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -5639,7 +5714,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
count_bkp = 0;
fprintf(fp, "\n");
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
count = 0;
fprintf(fp, "\n[Volumes]\n");
@@ -5708,26 +5783,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
brickinfo->hostname);
/* Determine which one is the arbiter brick */
if (volinfo->arbiter_count == 1) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- if (volinfo->tier_info.cold_replica_count != 1) {
- start_index = volinfo->tier_info.hot_brick_count + 1;
- if (count >= start_index &&
- ((count - start_index + 1) %
- volinfo->tier_info.cold_replica_count ==
- 0)) {
- fprintf(fp,
- "Volume%d.Brick%d."
- "is_arbiter: 1\n",
- count_bkp, count);
- }
- }
- } else {
- if (count % volinfo->replica_count == 0) {
- fprintf(fp,
- "Volume%d.Brick%d."
- "is_arbiter: 1\n",
- count_bkp, count);
- }
+ if (count % volinfo->replica_count == 0) {
+ fprintf(fp,
+ "Volume%d.Brick%d."
+ "is_arbiter: 1\n",
+ count_bkp, count);
}
}
/* Add following information only for bricks
@@ -5740,27 +5800,21 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
brickinfo->rdma_port);
fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp,
count, brickinfo->port_registered);
+ glusterd_brick_get_status_str(brickinfo, brick_status_str);
fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count,
- brickinfo->status ? "Started" : "Stopped");
-
- /*FIXME: This is a hacky way of figuring out whether a
- * brick belongs to the hot or cold tier */
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- fprintf(fp, "Volume%d.Brick%d.tier: %s\n", count_bkp, count,
- count <= volinfo->tier_info.hot_brick_count ? "Hot"
- : "Cold");
- }
+ brick_status_str);
ret = sys_statvfs(brickinfo->path, &brickstat);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
"statfs error: %s ", strerror(errno));
- goto out;
+ memfree = 0;
+ memtotal = 0;
+ } else {
+ memfree = brickstat.f_bfree * brickstat.f_bsize;
+ memtotal = brickstat.f_blocks * brickstat.f_bsize;
}
- memfree = brickstat.f_bfree * brickstat.f_bsize;
- memtotal = brickstat.f_blocks * brickstat.f_bsize;
-
fprintf(fp, "Volume%d.Brick%d.spacefree: %" PRIu64 "Bytes\n",
count_bkp, count, memfree);
fprintf(fp, "Volume%d.Brick%d.spacetotal: %" PRIu64 "Bytes\n",
@@ -5826,50 +5880,10 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
GF_FREE(rebal_data);
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_volume_get_hot_tier_type_str(volinfo,
- &hot_tier_type_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
- "Failed to get hot tier type for "
- "volume: %s",
- volinfo->volname);
- goto out;
- }
-
- ret = glusterd_volume_get_cold_tier_type_str(volinfo,
- &cold_tier_type_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
- "Failed to get cold tier type for "
- "volume: %s",
- volinfo->volname);
- goto out;
- }
-
- fprintf(fp, "Volume%d.tier_info.cold_tier_type: %s\n", count,
- cold_tier_type_str);
- fprintf(fp, "Volume%d.tier_info.cold_brick_count: %d\n", count,
- volinfo->tier_info.cold_brick_count);
- fprintf(fp, "Volume%d.tier_info.cold_replica_count: %d\n", count,
- volinfo->tier_info.cold_replica_count);
- fprintf(fp, "Volume%d.tier_info.cold_disperse_count: %d\n", count,
- volinfo->tier_info.cold_disperse_count);
- fprintf(fp, "Volume%d.tier_info.cold_dist_leaf_count: %d\n", count,
- volinfo->tier_info.cold_dist_leaf_count);
- fprintf(fp, "Volume%d.tier_info.cold_redundancy_count: %d\n", count,
- volinfo->tier_info.cold_redundancy_count);
- fprintf(fp, "Volume%d.tier_info.hot_tier_type: %s\n", count,
- hot_tier_type_str);
- fprintf(fp, "Volume%d.tier_info.hot_brick_count: %d\n", count,
- volinfo->tier_info.hot_brick_count);
- fprintf(fp, "Volume%d.tier_info.hot_replica_count: %d\n", count,
- volinfo->tier_info.hot_replica_count);
- fprintf(fp, "Volume%d.tier_info.promoted: %d\n", count,
- volinfo->tier_info.promoted);
- fprintf(fp, "Volume%d.tier_info.demoted: %d\n", count,
- volinfo->tier_info.demoted);
- }
+ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count,
+ volinfo->shd.svc.online ? "Online" : "Offline");
+ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count,
+ volinfo->shd.svc.inited ? "True" : "False");
if (volinfo->rep_brick.src_brick && volinfo->rep_brick.dst_brick) {
fprintf(fp, "Volume%d.replace_brick.src: %s:%s\n", count,
@@ -5894,19 +5908,13 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
count = 0;
fprintf(fp, "\n[Services]\n");
-
- if (priv->shd_svc.inited) {
- fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name);
- fprintf(fp, "svc%d.online_status: %s\n\n", count,
- priv->shd_svc.online ? "Online" : "Offline");
- }
-
+#ifdef BUILD_GNFS
if (priv->nfs_svc.inited) {
fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name);
fprintf(fp, "svc%d.online_status: %s\n\n", count,
priv->nfs_svc.online ? "Online" : "Offline");
}
-
+#endif
if (priv->bitd_svc.inited) {
fprintf(fp, "svc%d.name: %s\n", ++count, priv->bitd_svc.name);
fprintf(fp, "svc%d.online_status: %s\n\n", count,
@@ -5942,6 +5950,7 @@ out:
ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
+ GF_FREE(rsp.dict.dict_val);
return ret;
}
@@ -6028,14 +6037,27 @@ get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo)
uuid_t volid = {0};
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
brickid_dup = gf_strdup(brickid);
- if (!brickid_dup)
+ if (!brickid_dup) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_id=%s", brickid, NULL);
goto out;
+ }
volid_str = brickid_dup;
brick = strchr(brickid_dup, ':');
- if (!volid_str || !brick)
+ if (!volid_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
goto out;
+ }
+
+ if (!brick) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ goto out;
+ }
*brick = '\0';
brick++;
@@ -6253,7 +6275,7 @@ glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno)
GF_ASSERT(peerctx);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
gf_msg_debug(THIS->name, 0,
@@ -6293,7 +6315,7 @@ glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno)
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return ret;
}
@@ -6340,7 +6362,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
event, peerctx->peername);
return 0;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
@@ -6453,7 +6475,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
glusterd_friend_sm();
glusterd_op_sm();
@@ -6476,20 +6498,26 @@ glusterd_null(rpcsvc_request_t *req)
return 0;
}
-rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
- [GLUSTERD_MGMT_NULL] = {"NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK,
- glusterd_handle_cluster_lock, NULL, 0,
- DRC_NA},
+static rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK",
+ glusterd_handle_cluster_lock, NULL,
+ GLUSTERD_MGMT_CLUSTER_LOCK, DRC_NA, 0},
[GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK",
- GLUSTERD_MGMT_CLUSTER_UNLOCK,
- glusterd_handle_cluster_unlock, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", GLUSTERD_MGMT_STAGE_OP,
- glusterd_handle_stage_op, NULL, 0, DRC_NA},
- [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP,
- glusterd_handle_commit_op, NULL, 0, DRC_NA},
+ glusterd_handle_cluster_unlock, NULL,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_handle_stage_op, NULL,
+ GLUSTERD_MGMT_STAGE_OP, DRC_NA, 0},
+ [GLUSTERD_MGMT_COMMIT_OP] =
+ {
+ "COMMIT_OP",
+ glusterd_handle_commit_op,
+ NULL,
+ GLUSTERD_MGMT_COMMIT_OP,
+ DRC_NA,
+ 0,
+ },
};
struct rpcsvc_program gd_svc_mgmt_prog = {
@@ -6501,19 +6529,18 @@ struct rpcsvc_program gd_svc_mgmt_prog = {
.synctask = _gf_true,
};
-rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
- [GLUSTERD_FRIEND_NULL] = {"NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL,
- 0, DRC_NA},
- [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", GLUSTERD_PROBE_QUERY,
- glusterd_handle_probe_query, NULL, 0, DRC_NA},
- [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", GLUSTERD_FRIEND_ADD,
- glusterd_handle_incoming_friend_req, NULL, 0,
- DRC_NA},
- [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE,
- glusterd_handle_incoming_unfriend_req, NULL, 0,
- DRC_NA},
- [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE,
- glusterd_handle_friend_update, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_handle_probe_query, NULL,
+ GLUSTERD_PROBE_QUERY, DRC_NA, 0},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_handle_incoming_friend_req,
+ NULL, GLUSTERD_FRIEND_ADD, DRC_NA, 0},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE",
+ glusterd_handle_incoming_unfriend_req, NULL,
+ GLUSTERD_FRIEND_REMOVE, DRC_NA, 0},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_handle_friend_update,
+ NULL, GLUSTERD_FRIEND_UPDATE, DRC_NA, 0},
};
struct rpcsvc_program gd_svc_peer_prog = {
@@ -6525,116 +6552,109 @@ struct rpcsvc_program gd_svc_peer_prog = {
.synctask = _gf_false,
};
-rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
- [GLUSTER_CLI_PROBE] = {"CLI_PROBE", GLUSTER_CLI_PROBE,
- glusterd_handle_cli_probe, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_PROBE] = {"CLI_PROBE", glusterd_handle_cli_probe, NULL,
+ GLUSTER_CLI_PROBE, DRC_NA, 0},
[GLUSTER_CLI_CREATE_VOLUME] = {"CLI_CREATE_VOLUME",
- GLUSTER_CLI_CREATE_VOLUME,
- glusterd_handle_create_volume, NULL, 0,
- DRC_NA},
+ glusterd_handle_create_volume, NULL,
+ GLUSTER_CLI_CREATE_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_DEFRAG_VOLUME] = {"CLI_DEFRAG_VOLUME",
- GLUSTER_CLI_DEFRAG_VOLUME,
- glusterd_handle_defrag_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", GLUSTER_CLI_DEPROBE,
- glusterd_handle_cli_deprobe, NULL, 0, DRC_NA},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS,
- glusterd_handle_cli_list_friends, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", GLUSTER_CLI_UUID_RESET,
- glusterd_handle_cli_uuid_reset, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_UUID_GET] = {"UUID_GET", GLUSTER_CLI_UUID_GET,
- glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", GLUSTER_CLI_START_VOLUME,
- glusterd_handle_cli_start_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME,
- glusterd_handle_cli_stop_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME,
- glusterd_handle_cli_delete_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", GLUSTER_CLI_GET_VOLUME,
- glusterd_handle_cli_get_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", GLUSTER_CLI_ADD_BRICK,
- glusterd_handle_add_brick, NULL, 0, DRC_NA},
- [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER,
- glusterd_handle_attach_tier, NULL, 0, DRC_NA},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK,
- glusterd_handle_replace_brick, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK,
- glusterd_handle_remove_brick, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", GLUSTER_CLI_LOG_ROTATE,
- glusterd_handle_log_rotate, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", GLUSTER_CLI_SET_VOLUME,
- glusterd_handle_set_volume, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME,
- glusterd_handle_sync_volume, NULL, 0, DRC_NA},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME,
- glusterd_handle_reset_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", GLUSTER_CLI_FSM_LOG,
- glusterd_handle_fsm_log, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", GLUSTER_CLI_GSYNC_SET,
- glusterd_handle_gsync_set, NULL, 0, DRC_NA},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME,
- glusterd_handle_cli_profile_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_GETWD] = {"GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME,
- glusterd_handle_status_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", GLUSTER_CLI_UMOUNT,
- glusterd_handle_umount, NULL, 1, DRC_NA},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME,
- glusterd_handle_cli_heal_volume, NULL, 0,
- DRC_NA},
+ glusterd_handle_defrag_volume, NULL,
+ GLUSTER_CLI_DEFRAG_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", glusterd_handle_cli_deprobe, NULL,
+ GLUSTER_CLI_DEPROBE, DRC_NA, 0},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS",
+ glusterd_handle_cli_list_friends, NULL,
+ GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", glusterd_handle_cli_uuid_reset,
+ NULL, GLUSTER_CLI_UUID_RESET, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL,
+ GLUSTER_CLI_UUID_GET, DRC_NA, 0},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME",
+ glusterd_handle_cli_start_volume, NULL,
+ GLUSTER_CLI_START_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", glusterd_handle_cli_stop_volume,
+ NULL, GLUSTER_CLI_STOP_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME",
+ glusterd_handle_cli_delete_volume, NULL,
+ GLUSTER_CLI_DELETE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume,
+ NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", glusterd_handle_add_brick, NULL,
+ GLUSTER_CLI_ADD_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", glusterd_handle_attach_tier,
+ NULL, GLUSTER_CLI_ATTACH_TIER, DRC_NA, 0},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK",
+ glusterd_handle_replace_brick, NULL,
+ GLUSTER_CLI_REPLACE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", glusterd_handle_remove_brick,
+ NULL, GLUSTER_CLI_REMOVE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", glusterd_handle_log_rotate,
+ NULL, GLUSTER_CLI_LOG_ROTATE, DRC_NA, 0},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", glusterd_handle_set_volume, NULL,
+ GLUSTER_CLI_SET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", glusterd_handle_sync_volume,
+ NULL, GLUSTER_CLI_SYNC_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", glusterd_handle_reset_volume,
+ NULL, GLUSTER_CLI_RESET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", glusterd_handle_fsm_log, NULL,
+ GLUSTER_CLI_FSM_LOG, DRC_NA, 0},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", glusterd_handle_gsync_set, NULL,
+ GLUSTER_CLI_GSYNC_SET, DRC_NA, 0},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME",
+ glusterd_handle_cli_profile_volume, NULL,
+ GLUSTER_CLI_PROFILE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", glusterd_handle_quota, NULL,
+ GLUSTER_CLI_QUOTA, DRC_NA, 0},
+ [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL,
+ GLUSTER_CLI_GETWD, DRC_NA, 1},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME",
+ glusterd_handle_status_volume, NULL,
+ GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL,
+ GLUSTER_CLI_MOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL,
+ GLUSTER_CLI_UMOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", glusterd_handle_cli_heal_volume,
+ NULL, GLUSTER_CLI_HEAL_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME",
- GLUSTER_CLI_STATEDUMP_VOLUME,
glusterd_handle_cli_statedump_volume,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME,
- glusterd_handle_cli_list_volume, NULL, 0,
- DRC_NA},
+ NULL, GLUSTER_CLI_STATEDUMP_VOLUME,
+ DRC_NA, 0},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume,
+ NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME",
- GLUSTER_CLI_CLRLOCKS_VOLUME,
glusterd_handle_cli_clearlocks_volume,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE,
- glusterd_handle_copy_file, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC,
- glusterd_handle_sys_exec, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME",
- GLUSTER_CLI_BARRIER_VOLUME,
- glusterd_handle_barrier, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT,
- glusterd_handle_get_vol_opt, NULL, 0, DRC_NA},
- [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT,
- glusterd_handle_bitrot, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GET_STATE] = {"GET_STATE", GLUSTER_CLI_GET_STATE,
- glusterd_handle_get_state, NULL, 0, DRC_NA},
- [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", GLUSTER_CLI_RESET_BRICK,
- glusterd_handle_reset_brick, NULL, 0, DRC_NA},
- [GLUSTER_CLI_TIER] = {"TIER", GLUSTER_CLI_TIER, glusterd_handle_tier, NULL,
- 0, DRC_NA},
+ NULL, GLUSTER_CLI_CLRLOCKS_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", glusterd_handle_copy_file, NULL,
+ GLUSTER_CLI_COPY_FILE, DRC_NA, 0},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", glusterd_handle_sys_exec, NULL,
+ GLUSTER_CLI_SYS_EXEC, DRC_NA, 0},
+ [GLUSTER_CLI_SNAP] = {"SNAP", glusterd_handle_snapshot, NULL,
+ GLUSTER_CLI_SNAP, DRC_NA, 0},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", glusterd_handle_barrier,
+ NULL, GLUSTER_CLI_BARRIER_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", glusterd_handle_ganesha_cmd, NULL,
+ GLUSTER_CLI_GANESHA, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", glusterd_handle_get_vol_opt,
+ NULL, DRC_NA, 0},
+ [GLUSTER_CLI_BITROT] = {"BITROT", glusterd_handle_bitrot, NULL,
+ GLUSTER_CLI_BITROT, DRC_NA, 0},
+ [GLUSTER_CLI_GET_STATE] = {"GET_STATE", glusterd_handle_get_state, NULL,
+ GLUSTER_CLI_GET_STATE, DRC_NA, 0},
+ [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", glusterd_handle_reset_brick,
+ NULL, GLUSTER_CLI_RESET_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_TIER] = {"TIER", glusterd_handle_tier, NULL, GLUSTER_CLI_TIER,
+ DRC_NA, 0},
[GLUSTER_CLI_REMOVE_TIER_BRICK] = {"REMOVE_TIER_BRICK",
- GLUSTER_CLI_REMOVE_TIER_BRICK,
- glusterd_handle_tier, NULL, 0, DRC_NA},
+ glusterd_handle_tier, NULL,
+ GLUSTER_CLI_REMOVE_TIER_BRICK, DRC_NA,
+ 0},
[GLUSTER_CLI_ADD_TIER_BRICK] = {"ADD_TIER_BRICK",
- GLUSTER_CLI_ADD_TIER_BRICK,
- glusterd_handle_add_tier_brick, NULL, 0,
- DRC_NA},
+ glusterd_handle_add_tier_brick, NULL,
+ GLUSTER_CLI_ADD_TIER_BRICK, DRC_NA, 0},
};
struct rpcsvc_program gd_svc_cli_prog = {
@@ -6651,27 +6671,25 @@ struct rpcsvc_program gd_svc_cli_prog = {
* read only queries, the only exception being MOUNT/UMOUNT which is required
* by geo-replication to support unprivileged master -> slave sessions.
*/
-rpcsvc_actor_t gd_svc_cli_trusted_actors[GLUSTER_CLI_MAXVALUE] = {
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS,
- glusterd_handle_cli_list_friends, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_UUID_GET] = {"UUID_GET", GLUSTER_CLI_UUID_GET,
- glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", GLUSTER_CLI_GET_VOLUME,
- glusterd_handle_cli_get_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_GETWD] = {"GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME,
- glusterd_handle_status_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME,
- glusterd_handle_cli_list_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", GLUSTER_CLI_UMOUNT,
- glusterd_handle_umount, NULL, 1, DRC_NA},
+static rpcsvc_actor_t gd_svc_cli_trusted_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS",
+ glusterd_handle_cli_list_friends, NULL,
+ GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL,
+ GLUSTER_CLI_UUID_GET, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume,
+ NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL,
+ GLUSTER_CLI_GETWD, DRC_NA, 1},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME",
+ glusterd_handle_status_volume, NULL,
+ GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume,
+ NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL,
+ GLUSTER_CLI_MOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL,
+ GLUSTER_CLI_UMOUNT, DRC_NA, 1},
};
struct rpcsvc_program gd_svc_cli_trusted_progs = {
@@ -6682,3 +6700,14 @@ struct rpcsvc_program gd_svc_cli_trusted_progs = {
.actors = gd_svc_cli_trusted_actors,
.synctask = _gf_true,
};
+
+/* As we cant remove the handlers, I'm moving the tier based
+ * handlers to this file as we no longer have gluster-tier.c
+ * and other tier.c files
+ */
+
+int
+glusterd_handle_tier(rpcsvc_request_t *req)
+{
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 53b500f4986..d96e35503dd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "glusterfs.h"
-#include "syscall.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-utils.h"
@@ -21,7 +21,6 @@
#include "glusterd-snapshot-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-snapd-svc-helper.h"
-#include "glusterd-tierd-svc-helper.h"
#include "glusterd-volgen.h"
#include "glusterd-quotad-svc.h"
#include "glusterd-messages.h"
@@ -30,6 +29,7 @@
#include "rpcsvc.h"
#include "rpc-common-xdr.h"
#include "glusterd-gfproxyd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
extern struct rpc_clnt_program gd_peer_prog;
extern struct rpc_clnt_program gd_mgmt_prog;
@@ -111,6 +111,8 @@ get_snap_volname_and_volinfo(const char *volpath, char **volname,
volfile_token = strtok_r(NULL, "/", &save_ptr);
*volname = gf_strdup(volfile_token);
if (NULL == *volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volname=%s", volfile_token, NULL);
ret = -1;
goto out;
}
@@ -202,7 +204,7 @@ out:
size_t
build_volfile_path(char *volume_id, char *path, size_t path_len,
- char *trusted_str)
+ char *trusted_str, dict_t *dict)
{
struct stat stbuf = {
0,
@@ -236,6 +238,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -252,45 +255,49 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
goto out;
}
- volid_ptr = strstr(volume_id, "tierd/");
+ volid_ptr = strstr(volume_id, "gluster/");
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
volid_ptr++;
- ret = glusterd_volinfo_find(volid_ptr, &volinfo);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
- "Couldn't find volinfo");
- goto out;
- }
- glusterd_svc_build_tierd_volfile_path(volinfo, path, path_len);
+ glusterd_svc_build_volfile_path(volid_ptr, priv->workdir, path,
+ path_len);
ret = 0;
goto out;
}
- volid_ptr = strstr(volume_id, "gluster/");
+ volid_ptr = strstr(volume_id, "gfproxy-client/");
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
volid_ptr++;
- glusterd_svc_build_volfile_path(volid_ptr, priv->workdir, path,
- path_len);
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+ goto out;
+ }
+
+ glusterd_get_gfproxy_client_volfile(volinfo, path, path_len);
+
ret = 0;
goto out;
}
- volid_ptr = strstr(volume_id, "gfproxy-client/");
+ volid_ptr = strstr(volume_id, "gfproxyd/");
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -302,16 +309,16 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
goto out;
}
- glusterd_get_gfproxy_client_volfile(volinfo, path, path_len);
-
+ glusterd_svc_build_gfproxyd_volfile_path(volinfo, path, path_len);
ret = 0;
goto out;
}
- volid_ptr = strstr(volume_id, "gfproxyd/");
+ volid_ptr = strstr(volume_id, "shd/");
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -319,11 +326,19 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
ret = glusterd_volinfo_find(volid_ptr, &volinfo);
if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo for volid=%s", volid_ptr);
goto out;
}
- glusterd_svc_build_gfproxyd_volfile_path(volinfo, path, path_len);
+ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
+
+ ret = glusterd_svc_set_shd_pidfile(volinfo, dict);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Couldn't set pidfile in dict for volid=%s", volid_ptr);
+ goto out;
+ }
ret = 0;
goto out;
}
@@ -358,6 +373,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -378,6 +394,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (volid_ptr) {
volid_ptr = strchr(volid_ptr, '/');
if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
@@ -394,6 +411,8 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
/* Split the volume name */
vol = strtok_r(dup_volname, ".", &save_ptr);
if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
ret = -1;
goto out;
}
@@ -438,18 +457,25 @@ build_volfile_path(char *volume_id, char *path, size_t path_len,
if (ret) {
dup_volname = gf_strdup(volid_ptr);
if (!dup_volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volid_ptr, NULL);
ret = -1;
goto out;
}
/* Split the volume name */
vol = strtok_r(dup_volname, ".", &save_ptr);
if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
ret = -1;
goto out;
}
ret = glusterd_volinfo_find(vol, &volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ NULL);
goto out;
+ }
}
gotvolinfo:
@@ -458,8 +484,10 @@ gotvolinfo:
ret = snprintf(path, path_len, "%s/%s/%s.vol", path_prefix,
volinfo->volname, volid_ptr);
- if (ret == -1)
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = sys_stat(path, &stbuf);
@@ -514,12 +542,14 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo,
GF_ASSERT(peerinfo);
if (!args->xdata.xdata_len) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = 0;
goto out;
}
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -553,6 +583,8 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo,
}
*brick_name = gf_strdup(name);
if (*brick_name == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Brick_name=%s", name, NULL);
ret = -1;
goto out;
}
@@ -898,14 +930,27 @@ __server_getspec(rpcsvc_request_t *req)
char addrstr[RPCSVC_PEER_STRLEN] = {0};
peer_info_t *peerinfo = NULL;
xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_conf_t *conf = NULL;
+ int peer_cnt = 0;
+ char *peer_hosts = NULL;
+ char *tmp_str = NULL;
+ char portstr[10] = {
+ 0,
+ };
+ int len = 0;
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_getspec_req);
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode the message");
goto fail;
}
@@ -920,6 +965,9 @@ __server_getspec(rpcsvc_request_t *req)
goto fail;
}
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MOUNT_REQ_RCVD,
+ "Received mount request for volume %s", volume);
+
/* Need to strip leading '/' from volnames. This was introduced to
* support nfs style mount parameters for native gluster mount
*/
@@ -931,7 +979,7 @@ __server_getspec(rpcsvc_request_t *req)
volume);
if (ret < 0 || ret >= sizeof(peerinfo->volname)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
- "peerinfo->volname %s truncated or error occured: "
+ "peerinfo->volname %s truncated or error occurred: "
"(ret: %d)",
peerinfo->volname, ret);
ret = -1;
@@ -950,11 +998,22 @@ __server_getspec(rpcsvc_request_t *req)
goto fail;
}
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -ENOMEM;
+ goto fail;
+ }
+
trans = req->trans;
/* addrstr will be empty for cli socket connections */
ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr));
- if (ret)
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL,
+ "Failed to get the peername");
goto fail;
+ }
tmp = strrchr(addrstr, ':');
if (tmp)
@@ -968,12 +1027,61 @@ __server_getspec(rpcsvc_request_t *req)
*/
if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) {
ret = build_volfile_path(volume, filename, sizeof(filename),
- TRUSTED_PREFIX);
+ TRUSTED_PREFIX, dict);
} else {
- ret = build_volfile_path(volume, filename, sizeof(filename), NULL);
+ ret = build_volfile_path(volume, filename, sizeof(filename), NULL,
+ dict);
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list)
+ {
+ if (!peer->connected)
+ continue;
+ if (!peer_hosts) {
+ if (peer->port) {
+ snprintf(portstr, sizeof(portstr), "%d", peer->port);
+ } else {
+ snprintf(portstr, sizeof(portstr), "%d", GLUSTERD_DEFAULT_PORT);
+ }
+ len = strlen(peer->hostname) + strlen(portstr) + 3;
+ tmp_str = GF_CALLOC(1, len, gf_gld_mt_char);
+ snprintf(tmp_str, len, "%s%s%s%s", peer->hostname, ":", portstr,
+ " ");
+ peer_hosts = tmp_str;
+ } else {
+ len = strlen(peer_hosts) + strlen(peer->hostname) +
+ strlen(portstr) + 3;
+ tmp_str = GF_CALLOC(1, len, gf_gld_mt_char);
+ snprintf(tmp_str, len, "%s%s%s%s%s", peer_hosts, peer->hostname,
+ ":", portstr, " ");
+ GF_FREE(peer_hosts);
+ peer_hosts = tmp_str;
+ }
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+ if (peer_cnt) {
+ op_ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peer_host in dict");
+ ret = op_ret;
+ goto fail;
+ }
}
if (ret == 0) {
+ if (dict->count > 0) {
+ ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val,
+ &rsp.xdata.xdata_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto fail;
+ }
+ }
+
/* to allocate the proper buffer to hold the file data */
ret = sys_stat(filename, &stbuf);
if (ret < 0) {
@@ -990,6 +1098,7 @@ __server_getspec(rpcsvc_request_t *req)
}
ret = file_len = stbuf.st_size;
} else {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
op_errno = ENOENT;
goto fail;
}
@@ -997,6 +1106,7 @@ __server_getspec(rpcsvc_request_t *req)
if (file_len) {
rsp.spec = CALLOC(file_len + 1, sizeof(char));
if (!rsp.spec) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
op_errno = ENOMEM;
goto fail;
@@ -1015,7 +1125,6 @@ __server_getspec(rpcsvc_request_t *req)
goto fail;
}
}
-
/* convert to XDR */
fail:
if (spec_fd >= 0)
@@ -1024,6 +1133,9 @@ fail:
GF_FREE(brick_name);
rsp.op_ret = ret;
+ if (rsp.op_ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MOUNT_REQ_FAIL,
+ "Failed to mount the volume");
if (op_errno)
rsp.op_errno = gf_errno_to_error(op_errno);
@@ -1035,9 +1147,18 @@ fail:
(xdrproc_t)xdr_gf_getspec_rsp);
free(args.key); // malloced by xdr
free(rsp.spec);
+
+ if (peer_hosts)
+ GF_FREE(peer_hosts);
+ if (dict)
+ dict_unref(dict);
+
if (args.xdata.xdata_val)
free(args.xdata.xdata_val);
+ if (rsp.xdata.xdata_val)
+ GF_FREE(rsp.xdata.xdata_val);
+
return 0;
}
@@ -1064,13 +1185,17 @@ __server_event_notify(rpcsvc_request_t *req)
(xdrproc_t)xdr_gf_event_notify_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
if (args.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
return ret;
+ }
ret = dict_unserialize(args.dict.dict_val, args.dict.dict_len, &dict);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
@@ -1189,9 +1314,9 @@ gd_validate_mgmt_hndsk_req(rpcsvc_request_t *req, dict_t *dict)
*/
if (!ret) {
gf_uuid_parse(uuid_str, peer_uuid);
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(peer_uuid, NULL) != NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret)
return _gf_true;
}
@@ -1207,7 +1332,7 @@ gd_validate_mgmt_hndsk_req(rpcsvc_request_t *req, dict_t *dict)
* is available in the peerinfo list but the uuid has changed of the
* node due to a reinstall, in that case the validation should fail!
*/
- rcu_read_lock();
+ RCU_READ_LOCK;
if (!uuid_str) {
ret = (glusterd_peerinfo_find(NULL, hostname) == NULL);
} else {
@@ -1225,7 +1350,7 @@ gd_validate_mgmt_hndsk_req(rpcsvc_request_t *req, dict_t *dict)
ret = -1;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_REQ_REJECTED,
"Rejecting management "
@@ -1263,6 +1388,7 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -1276,8 +1402,10 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_int32(dict, GD_OP_VERSION_KEY, conf->op_version);
if (ret) {
@@ -1363,6 +1491,7 @@ __glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -1435,22 +1564,25 @@ __server_get_volume_info(rpcsvc_request_t *req)
char *volume_id_str = NULL;
int32_t flags = 0;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
ret = xdr_to_generic(req->msg[0], &vol_info_req,
(xdrproc_t)xdr_gf_get_volume_info_req);
if (ret < 0) {
/* failed to decode msg */
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
- gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD,
- "Received get volume info req");
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, NULL);
if (vol_info_req.dict.dict_len) {
/* Unserialize the dictionary */
dict = dict_new();
if (!dict) {
- gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1459,9 +1591,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_unserialize(vol_info_req.dict.dict_val,
vol_info_req.dict.dict_len, &dict);
if (ret < 0) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to "
- "unserialize req-buffer to dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1472,8 +1603,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_get_int32(dict, "flags", &flags);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED,
- "failed to get flags");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=flags", NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1481,13 +1612,15 @@ __server_get_volume_info(rpcsvc_request_t *req)
if (!flags) {
/* Nothing to query about. Just return success */
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, "No flags set");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, NULL);
ret = 0;
goto out;
}
ret = dict_get_str(dict, "volname", &volname);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
op_errno = EINVAL;
ret = -1;
goto out;
@@ -1495,6 +1628,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volname=%s", volname, NULL);
op_errno = EINVAL;
ret = -1;
goto out;
@@ -1503,6 +1638,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
if (flags & (int32_t)GF_GET_VOLUME_UUID) {
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1510,8 +1647,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
dict_rsp = dict_new();
if (!dict_rsp) {
- gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
GF_FREE(volume_id_str);
ret = -1;
@@ -1519,6 +1656,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
}
ret = dict_set_dynstr(dict_rsp, "volume_id", volume_id_str);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volume_id", NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1527,6 +1666,8 @@ __server_get_volume_info(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(dict_rsp, &vol_info_rsp.dict.dict_val,
&vol_info_rsp.dict.dict_len);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
op_errno = -ret;
ret = -1;
goto out;
@@ -1592,6 +1733,8 @@ __server_get_snap_info(rpcsvc_request_t *req)
if (snap_info_req.dict.dict_len) {
dict = dict_new();
if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1622,6 +1765,8 @@ __server_get_snap_info(rpcsvc_request_t *req)
dict_rsp = dict_new();
if (!dict_rsp) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
op_errno = ENOMEM;
ret = -1;
goto out;
@@ -1664,16 +1809,16 @@ server_get_snap_info(rpcsvc_request_t *req)
return glusterd_big_locked_handler(req, __server_get_snap_info);
}
-rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = {
- [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0, DRC_NA},
- [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0,
- DRC_NA},
- [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY,
- server_event_notify, NULL, 0, DRC_NA},
- [GF_HNDSK_GET_VOLUME_INFO] = {"GETVOLUMEINFO", GF_HNDSK_GET_VOLUME_INFO,
- server_get_volume_info, NULL, 0, DRC_NA},
- [GF_HNDSK_GET_SNAPSHOT_INFO] = {"GETSNAPINFO", GF_HNDSK_GET_SNAPSHOT_INFO,
- server_get_snap_info, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = {"NULL", NULL, NULL, GF_HNDSK_NULL, DRC_NA, 0},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC,
+ DRC_NA, 0},
+ [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", server_event_notify, NULL,
+ GF_HNDSK_EVENT_NOTIFY, DRC_NA, 0},
+ [GF_HNDSK_GET_VOLUME_INFO] = {"GETVOLUMEINFO", server_get_volume_info, NULL,
+ GF_HNDSK_GET_VOLUME_INFO, DRC_NA, 0},
+ [GF_HNDSK_GET_SNAPSHOT_INFO] = {"GETSNAPINFO", server_get_snap_info, NULL,
+ GF_HNDSK_GET_SNAPSHOT_INFO, DRC_NA, 0},
};
struct rpcsvc_program gluster_handshake_prog = {
@@ -1685,9 +1830,9 @@ struct rpcsvc_program gluster_handshake_prog = {
};
/* A minimal RPC program just for the cli getspec command */
-rpcsvc_actor_t gluster_cli_getspec_actors[GF_HNDSK_MAXVALUE] = {
- [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0,
- DRC_NA},
+static rpcsvc_actor_t gluster_cli_getspec_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC,
+ DRC_NA, 0},
};
struct rpcsvc_program gluster_cli_getspec_prog = {
@@ -1698,26 +1843,26 @@ struct rpcsvc_program gluster_cli_getspec_prog = {
.numactors = GF_HNDSK_MAXVALUE,
};
-char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
+static char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
[GF_DUMP_NULL] = "NULL",
[GF_DUMP_DUMP] = "DUMP",
[GF_DUMP_PING] = "PING",
};
-rpc_clnt_prog_t glusterd_dump_prog = {
+static rpc_clnt_prog_t glusterd_dump_prog = {
.progname = "GLUSTERD-DUMP",
.prognum = GLUSTER_DUMP_PROGRAM,
.progver = GLUSTER_DUMP_VERSION,
.procnames = glusterd_dump_proc,
};
-rpcsvc_actor_t glusterd_mgmt_hndsk_actors[GD_MGMT_HNDSK_MAXVALUE] = {
- [GD_MGMT_HNDSK_NULL] = {"NULL", GD_MGMT_HNDSK_NULL, NULL, NULL, 0, DRC_NA},
- [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", GD_MGMT_HNDSK_VERSIONS,
- glusterd_mgmt_hndsk_versions, NULL, 0, DRC_NA},
- [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK", GD_MGMT_HNDSK_VERSIONS_ACK,
- glusterd_mgmt_hndsk_versions_ack, NULL, 0,
- DRC_NA},
+static rpcsvc_actor_t glusterd_mgmt_hndsk_actors[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = {"NULL", NULL, NULL, GD_MGMT_HNDSK_NULL, DRC_NA, 0},
+ [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", glusterd_mgmt_hndsk_versions, NULL,
+ GD_MGMT_HNDSK_VERSIONS, DRC_NA, 0},
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK",
+ glusterd_mgmt_hndsk_versions_ack, NULL,
+ GD_MGMT_HNDSK_VERSIONS_ACK, DRC_NA, 0},
};
struct rpcsvc_program glusterd_mgmt_hndsk_prog = {
@@ -1728,13 +1873,13 @@ struct rpcsvc_program glusterd_mgmt_hndsk_prog = {
.numactors = GD_MGMT_HNDSK_MAXVALUE,
};
-char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = {
+static char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = {
[GD_MGMT_HNDSK_NULL] = "NULL",
[GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS",
[GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK",
};
-rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = {
+static rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = {
.progname = "Gluster MGMT Handshake",
.prognum = GD_MGMT_HNDSK_PROGRAM,
.progver = GD_MGMT_HNDSK_VERSION,
@@ -1768,16 +1913,17 @@ glusterd_event_connected_inject(glusterd_peerctx_t *peerctx)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
ret = -1;
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", peerctx->peername,
uuid_utoa(peerctx->peerid));
GF_FREE(ctx);
- goto unlock;
+ goto out;
}
ctx->hostname = gf_strdup(peerinfo->hostname);
ctx->port = peerinfo->port;
@@ -1790,13 +1936,13 @@ glusterd_event_connected_inject(glusterd_peerctx_t *peerctx)
ret = glusterd_friend_sm_inject_event(event);
+ RCU_READ_UNLOCK;
+
if (ret)
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
"Unable to inject "
"EVENT_CONNECTED ret = %d",
ret);
-unlock:
- rcu_read_unlock();
out:
gf_msg_debug("glusterd", 0, "returning %d", ret);
@@ -1813,22 +1959,45 @@ gd_validate_peer_op_version(xlator_t *this, glusterd_peerinfo_t *peerinfo,
int32_t peer_min_op_version = 0;
int32_t peer_max_op_version = 0;
- if (!dict || !this || !peerinfo)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ if (!this) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
+ goto out;
+ }
+
+ if (!peerinfo) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
conf = this->private;
ret = dict_get_int32(dict, GD_OP_VERSION_KEY, &peer_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = dict_get_int32(dict, GD_MAX_OP_VERSION_KEY, &peer_max_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MAX_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = dict_get_int32(dict, GD_MIN_OP_VERSION_KEY, &peer_min_op_version);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MIN_OP_VERSION_KEY, NULL);
goto out;
+ }
ret = -1;
/* Check if peer can support our op_version */
@@ -1870,7 +2039,7 @@ __glusterd_mgmt_hndsk_version_ack_cbk(struct rpc_req *req, struct iovec *iov,
frame = myframe;
peerctx = frame->local;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
gf_msg_debug(this->name, 0, "Could not find peer %s(%s)",
@@ -1930,7 +2099,7 @@ out:
if (ret != 0 && peerinfo)
rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
frame->local = NULL;
STACK_DESTROY(frame->root);
@@ -1979,7 +2148,7 @@ __glusterd_mgmt_hndsk_version_cbk(struct rpc_req *req, struct iovec *iov,
frame = myframe;
peerctx = frame->local;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
@@ -2055,7 +2224,7 @@ out:
rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (rsp.hndsk.hndsk_val)
free(rsp.hndsk.hndsk_val);
@@ -2094,14 +2263,20 @@ glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx)
int ret = -1;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ gf_smsg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto out;
+ }
frame->local = peerctx;
req_dict = dict_new();
- if (!req_dict)
+ if (!req_dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_set_dynstr(req_dict, GD_PEER_ID_KEY,
gf_strdup(uuid_utoa(MY_UUID)));
@@ -2114,23 +2289,29 @@ glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx)
GF_PROTOCOL_DICT_SERIALIZE(this, req_dict, (&req.hndsk.hndsk_val),
req.hndsk.hndsk_len, ret, out);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
gf_msg_debug(THIS->name, 0, "Could not find peer %s(%s)",
peerctx->peername, uuid_utoa(peerctx->peerid));
- goto unlock;
+ goto out;
}
ret = glusterd_submit_request(
peerinfo->rpc, &req, frame, &gd_clnt_mgmt_hndsk_prog,
GD_MGMT_HNDSK_VERSIONS, NULL, this, glusterd_mgmt_hndsk_version_cbk,
(xdrproc_t)xdr_gf_mgmt_hndsk_req);
+
+ RCU_READ_UNLOCK;
+
ret = 0;
-unlock:
- rcu_read_unlock();
+
out:
+ if (req_dict)
+ dict_unref(req_dict);
+
if (ret && frame)
STACK_DESTROY(frame->root);
@@ -2244,7 +2425,7 @@ __glusterd_peer_dump_version_cbk(struct rpc_req *req, struct iovec *iov,
frame = myframe;
peerctx = frame->local;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
@@ -2320,7 +2501,7 @@ out:
if (ret != 0 && peerinfo)
rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
glusterd_friend_sm();
glusterd_op_sm();
@@ -2362,20 +2543,26 @@ glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc,
int ret = -1;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
goto out;
+ }
frame->local = peerctx;
- if (!peerctx)
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
gf_msg_debug(this->name, 0, "Couldn't find peer %s(%s)",
peerctx->peername, uuid_utoa(peerctx->peerid));
- goto unlock;
+ goto out;
}
req.gfs_id = 0xcafe;
@@ -2383,8 +2570,8 @@ glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc,
ret = glusterd_submit_request(
peerinfo->rpc, &req, frame, &glusterd_dump_prog, GF_DUMP_DUMP, NULL,
this, glusterd_peer_dump_version_cbk, (xdrproc_t)xdr_gf_dump_req);
-unlock:
- rcu_read_unlock();
+
+ RCU_READ_UNLOCK;
out:
if (ret && frame)
STACK_DESTROY(frame->root);
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
index 4a482d5cfb7..61c0f1c946f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -8,15 +8,15 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "logging.h"
-#include "run.h"
-#include "defaults.h"
-#include "syscall.h"
-#include "compat.h"
-#include "compat-errno.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/run.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
@@ -87,21 +87,24 @@ glusterd_hooks_create_hooks_directory(char *basedir)
glusterd_conf_t *priv = NULL;
int32_t len = 0;
- priv = THIS->private;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
snprintf(path, sizeof(path), "%s/hooks", basedir);
- ret = mkdir_p(path, 0777, _gf_true);
+ ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Path=%s", path, NULL);
goto out;
}
GLUSTERD_GET_HOOKS_DIR(version_dir, GLUSTERD_HOOK_VER, priv);
- ret = mkdir_p(version_dir, 0777, _gf_true);
+ ret = mkdir_p(version_dir, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", version_dir);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Directory=%s", version_dir, NULL);
goto out;
}
@@ -112,13 +115,14 @@ glusterd_hooks_create_hooks_directory(char *basedir)
len = snprintf(path, sizeof(path), "%s/%s", version_dir, cmd_subdir);
if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
- ret = mkdir_p(path, 0777, _gf_true);
+ ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
- "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
goto out;
}
@@ -126,13 +130,15 @@ glusterd_hooks_create_hooks_directory(char *basedir)
len = snprintf(path, sizeof(path), "%s/%s/%s", version_dir,
cmd_subdir, type_subdir[type]);
if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
ret = -1;
goto out;
}
- ret = mkdir_p(path, 0777, _gf_true);
+ ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno,
- GD_MSG_CREATE_DIR_FAILED, "Unable to create %s", path);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
goto out;
}
}
@@ -200,20 +206,31 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
int i = 0;
int count = 0;
int ret = -1;
+ int flag = 0;
char query[1024] = {
0,
};
char *key = NULL;
char *value = NULL;
+ char *inet_family = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
ret = dict_get_int32(dict, "count", &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
/* This will not happen unless op_ctx
* is corrupted*/
- if (!count)
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, "count",
+ NULL);
goto out;
+ }
runner_add_arg(runner, "-o");
for (i = 1; ret == 0; i++) {
@@ -228,9 +245,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
continue;
runner_argprintf(runner, "%s=%s", key, value);
+ if ((strncmp(key, "cluster.enable-shared-storage",
+ SLEN("cluster.enable-shared-storage")) == 0 ||
+ strncmp(key, "enable-shared-storage",
+ SLEN("enable-shared-storage")) == 0) &&
+ strncmp(value, "enable", SLEN("enable")) == 0)
+ flag = 1;
}
glusterd_hooks_add_custom_args(dict, runner);
+ if (flag == 1) {
+ ret = dict_get_str_sizen(this->options, "transport.address-family",
+ &inet_family);
+ if (!ret) {
+ runner_argprintf(runner, "transport.address-family=%s",
+ inet_family);
+ }
+ }
ret = 0;
out:
@@ -357,27 +388,31 @@ glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
lines = GF_CALLOC(1, N * sizeof(*lines), gf_gld_mt_charptr);
if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
ret = -1;
line_count = 0;
- GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch);
- while (entry) {
+
+ while ((entry = sys_readdir(hookdir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (line_count == N - 1) {
N *= 2;
lines = GF_REALLOC(lines, N * sizeof(char *));
- if (!lines)
+ if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
}
if (glusterd_is_hook_enabled(entry->d_name)) {
lines[line_count] = gf_strdup(entry->d_name);
line_count++;
}
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch);
}
lines[line_count] = NULL;
@@ -461,31 +496,40 @@ glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir,
int ret = -1;
glusterd_hooks_stub_t *hooks_stub = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(stub);
if (!stub)
goto out;
hooks_stub = GF_CALLOC(1, sizeof(*hooks_stub), gf_gld_mt_hooks_stub_t);
- if (!hooks_stub)
+ if (!hooks_stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&hooks_stub->all_hooks);
hooks_stub->op = op;
hooks_stub->scriptdir = gf_strdup(scriptdir);
- if (!hooks_stub->scriptdir)
+ if (!hooks_stub->scriptdir) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "scriptdir=%s", scriptdir, NULL);
goto out;
+ }
hooks_stub->op_ctx = dict_copy_with_ref(op_ctx, hooks_stub->op_ctx);
- if (!hooks_stub->op_ctx)
+ if (!hooks_stub->op_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_COPY_FAIL, NULL);
goto out;
+ }
*stub = hooks_stub;
ret = 0;
out:
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
- "Failed to initialize "
- "post hooks stub");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
+ NULL);
glusterd_hooks_stub_cleanup(hooks_stub);
}
@@ -547,12 +591,20 @@ glusterd_hooks_priv_init(glusterd_hooks_private_t **new)
int ret = -1;
glusterd_hooks_private_t *hooks_priv = NULL;
- if (!new)
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!new) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
hooks_priv = GF_CALLOC(1, sizeof(*hooks_priv), gf_gld_mt_hooks_priv_t);
- if (!hooks_priv)
+ if (!hooks_priv) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
pthread_mutex_init(&hooks_priv->mutex, NULL);
pthread_cond_init(&hooks_priv->cond, NULL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h
index 3813c18e989..f8b887b9bd7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.h
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h
@@ -34,17 +34,17 @@ typedef enum glusterd_commit_hook_type {
typedef struct hooks_private {
struct cds_list_head list;
- int waitcount; // debug purposes
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_t worker;
+ int waitcount; // debug purposes
} glusterd_hooks_private_t;
typedef struct hooks_stub {
struct cds_list_head all_hooks;
char *scriptdir;
- glusterd_op_t op;
dict_t *op_ctx;
+ glusterd_op_t op;
} glusterd_hooks_stub_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c
index ad87c1df64f..11523f2854b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-locks.c
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -17,8 +17,8 @@
#include "glusterd-volgen.h"
#include "glusterd-locks.h"
#include "glusterd-errno.h"
-#include "run.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include "glusterd-messages.h"
#include <signal.h>
@@ -36,22 +36,20 @@ glusterd_valid_entities valid_types[] = {
};
/* Checks if the lock request is for a valid entity */
-gf_boolean_t
+static gf_boolean_t
glusterd_mgmt_v3_is_type_valid(char *type)
{
- int32_t i = 0;
- gf_boolean_t ret = _gf_false;
+ int i = 0;
GF_ASSERT(type);
for (i = 0; valid_types[i].type; i++) {
if (!strcmp(type, valid_types[i].type)) {
- ret = _gf_true;
- break;
+ return _gf_true;
}
}
- return ret;
+ return _gf_false;
}
/* Initialize the global mgmt_v3 lock list(dict) when
@@ -138,15 +136,12 @@ out:
return;
}
-int32_t
+static int32_t
glusterd_get_mgmt_v3_lock_owner(char *key, uuid_t *uuid)
{
int32_t ret = -1;
glusterd_mgmt_v3_lock_obj *lock_obj = NULL;
glusterd_conf_t *priv = NULL;
- uuid_t no_owner = {
- 0,
- };
xlator_t *this = NULL;
this = THIS;
@@ -164,8 +159,6 @@ glusterd_get_mgmt_v3_lock_owner(char *key, uuid_t *uuid)
ret = dict_get_bin(priv->mgmt_v3_lock, key, (void **)&lock_obj);
if (!ret)
gf_uuid_copy(*uuid, lock_obj->lock_owner);
- else
- gf_uuid_copy(*uuid, no_owner);
ret = 0;
out:
@@ -199,11 +192,11 @@ glusterd_release_multiple_locks_per_entity(dict_t *dict, uuid_t uuid,
/* Release all the locks held */
for (i = 0; i < locked_count; i++) {
- snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
/* Looking for volname1, volname2 or snapname1, *
* as key in the dict snapname2 */
- ret = dict_get_str(dict, name_buf, &name);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to get %s locked_count = %d", name_buf,
@@ -248,11 +241,11 @@ glusterd_acquire_multiple_locks_per_entity(dict_t *dict, uuid_t uuid,
/* Locking one element after other */
for (i = 0; i < count; i++) {
- snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
/* Looking for volname1, volname2 or snapname1, *
* as key in the dict snapname2 */
- ret = dict_get_str(dict, name_buf, &name);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to get %s count = %d", name_buf, count);
@@ -321,13 +314,13 @@ glusterd_mgmt_v3_unlock_entity(dict_t *dict, uuid_t uuid, char *type,
}
/* Looking for volcount or snapcount in the dict */
- snprintf(name_buf, sizeof(name_buf), "%scount", type);
- ret = dict_get_int32(dict, name_buf, &count);
+ ret = snprintf(name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32n(dict, name_buf, ret, &count);
if (ret) {
/* count is not present. Only one *
* element name needs to be unlocked */
- snprintf(name_buf, sizeof(name_buf), "%sname", type);
- ret = dict_get_str(dict, name_buf, &name);
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname", type);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to fetch %sname", type);
@@ -390,13 +383,13 @@ glusterd_mgmt_v3_lock_entity(dict_t *dict, uuid_t uuid, uint32_t *op_errno,
}
/* Looking for volcount or snapcount in the dict */
- snprintf(name_buf, sizeof(name_buf), "%scount", type);
- ret = dict_get_int32(dict, name_buf, &count);
+ ret = snprintf(name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32n(dict, name_buf, ret, &count);
if (ret) {
/* count is not present. Only one *
* element name needs to be locked */
- snprintf(name_buf, sizeof(name_buf), "%sname", type);
- ret = dict_get_str(dict, name_buf, &name);
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname", type);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to fetch %sname", type);
@@ -569,8 +562,8 @@ glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
goto out;
}
- gf_msg_debug(this->name, 0, "Trying to acquire lock of %s %s for %s as %s",
- type, name, uuid_utoa(uuid), key);
+ gf_msg_debug(this->name, 0, "Trying to acquire lock of %s for %s", key,
+ uuid_utoa(uuid));
ret = glusterd_get_mgmt_v3_lock_owner(key, &owner);
if (ret) {
@@ -589,7 +582,7 @@ glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
goto out;
}
- lock_obj = GF_CALLOC(1, sizeof(glusterd_mgmt_v3_lock_obj),
+ lock_obj = GF_MALLOC(sizeof(glusterd_mgmt_v3_lock_obj),
gf_common_mt_mgmt_v3_lock_obj_t);
if (!lock_obj) {
ret = -1;
@@ -616,9 +609,6 @@ glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
}
mgmt_lock_timer->xl = THIS;
- key_dup = gf_strdup(key);
- delay.tv_sec = priv->mgmt_v3_lock_timeout;
- delay.tv_nsec = 0;
/*changing to default timeout value*/
priv->mgmt_v3_lock_timeout = GF_LOCK_TIMER;
@@ -626,17 +616,19 @@ glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
mgmt_lock_timer_xl = mgmt_lock_timer->xl;
if (!mgmt_lock_timer_xl) {
GF_FREE(mgmt_lock_timer);
- GF_FREE(key_dup);
goto out;
}
mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx;
if (!mgmt_lock_timer_ctx) {
GF_FREE(mgmt_lock_timer);
- GF_FREE(key_dup);
goto out;
}
+ key_dup = gf_strdup(key);
+ delay.tv_sec = priv->mgmt_v3_lock_timeout;
+ delay.tv_nsec = 0;
+
mgmt_lock_timer->timer = gf_timer_call_after(
mgmt_lock_timer_ctx, delay, gd_mgmt_v3_unlock_timer_cbk, key_dup);
@@ -652,18 +644,18 @@ glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
/* Saving the backtrace into the pre-allocated buffer, ctx->btbuf*/
if ((bt = gf_backtrace_save(NULL))) {
- snprintf(key, sizeof(key), "debug.last-success-bt-%s-%s", name, type);
+ snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup);
ret = dict_set_dynstr_with_alloc(priv->mgmt_v3_lock, key, bt);
if (ret)
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
"Failed to save "
- "the back trace for lock %s-%s granted to %s",
- name, type, uuid_utoa(uuid));
+ "the back trace for lock %s granted to %s",
+ key_dup, uuid_utoa(uuid));
ret = 0;
}
- gf_msg_debug(this->name, 0, "Lock for %s %s successfully held by %s", type,
- name, uuid_utoa(uuid));
+ gf_msg_debug(this->name, 0, "Lock for %s successfully held by %s", key_dup,
+ uuid_utoa(uuid));
ret = 0;
out:
@@ -681,9 +673,9 @@ gd_mgmt_v3_unlock_timer_cbk(void *data)
glusterd_conf_t *conf = NULL;
glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL;
char *key = NULL;
- char *type = NULL;
+ int keylen;
char bt_key[PATH_MAX] = "";
- char name[PATH_MAX] = "";
+ int bt_key_len = 0;
int32_t ret = -1;
glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL;
xlator_t *mgmt_lock_timer_xl = NULL;
@@ -698,21 +690,18 @@ gd_mgmt_v3_unlock_timer_cbk(void *data)
GF_ASSERT(NULL != data);
key = (char *)data;
- dict_del(conf->mgmt_v3_lock, key);
-
- type = strrchr(key, '_');
- strncpy(name, key, strlen(key) - strlen(type) - 1);
+ keylen = strlen(key);
+ dict_deln(conf->mgmt_v3_lock, key, keylen);
- ret = snprintf(bt_key, PATH_MAX, "debug.last-success-bt-%s-%s", name,
- type + 1);
- if (ret != SLEN("debug.last-success-bt-") + strlen(name) + strlen(type)) {
+ bt_key_len = snprintf(bt_key, PATH_MAX, "debug.last-success-bt-%s", key);
+ if (bt_key_len != SLEN("debug.last-success-bt-") + keylen) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
"Unable to create backtrace "
"key");
goto out;
}
- dict_del(conf->mgmt_v3_lock, bt_key);
+ dict_deln(conf->mgmt_v3_lock, bt_key, bt_key_len);
ret = dict_get_bin(conf->mgmt_v3_lock_timer, key,
(void **)&mgmt_lock_timer);
@@ -732,12 +721,12 @@ out:
timer = mgmt_lock_timer->timer;
GF_FREE(timer->data);
gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer);
- dict_del(conf->mgmt_v3_lock_timer, bt_key);
+ dict_deln(conf->mgmt_v3_lock_timer, bt_key, bt_key_len);
mgmt_lock_timer->timer = NULL;
gf_log(this->name, GF_LOG_INFO,
- "unlock timer is cancelled for volume"
+ "unlock timer is cancelled for volume_type"
" %s",
- name);
+ key);
}
ret_function:
@@ -750,6 +739,7 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
{
char key[PATH_MAX] = "";
char key_dup[PATH_MAX] = "";
+ int keylen;
int32_t ret = -1;
gf_boolean_t is_valid = _gf_true;
glusterd_conf_t *priv = NULL;
@@ -784,14 +774,13 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
goto out;
}
- ret = snprintf(key, sizeof(key), "%s_%s", name, type);
- if (ret != strlen(name) + 1 + strlen(type)) {
+ keylen = snprintf(key, sizeof(key), "%s_%s", name, type);
+ if (keylen != strlen(name) + 1 + strlen(type)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
"Unable to create key");
ret = -1;
goto out;
}
- (void)snprintf(key_dup, sizeof(key_dup), "%s", key);
gf_msg_debug(this->name, 0, "Trying to release lock of %s %s for %s as %s",
type, name, uuid_utoa(uuid), key);
@@ -820,7 +809,7 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
}
/* Removing the mgmt_v3 lock from the global list */
- dict_del(priv->mgmt_v3_lock, key);
+ dict_deln(priv->mgmt_v3_lock, key, keylen);
ret = dict_get_bin(priv->mgmt_v3_lock_timer, key,
(void **)&mgmt_lock_timer);
@@ -830,17 +819,18 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
goto out;
}
+ (void)snprintf(key_dup, sizeof(key_dup), "%s", key);
+
/* Remove the backtrace key as well */
- ret = snprintf(key, sizeof(key), "debug.last-success-bt-%s-%s", name, type);
- if (ret !=
- SLEN("debug.last-success-bt-") + strlen(name) + strlen(type) + 1) {
+ ret = snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup);
+ if (ret != SLEN("debug.last-success-bt-") + keylen) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
"Unable to create backtrace "
"key");
ret = -1;
goto out;
}
- dict_del(priv->mgmt_v3_lock, key);
+ dict_deln(priv->mgmt_v3_lock, key, ret);
gf_msg_debug(this->name, 0, "Lock for %s %s successfully released", type,
name);
@@ -858,7 +848,7 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
timer = mgmt_lock_timer->timer;
GF_FREE(timer->data);
gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer);
- dict_del(priv->mgmt_v3_lock_timer, key_dup);
+ dict_deln(priv->mgmt_v3_lock_timer, key_dup, keylen);
}
ret = glusterd_volinfo_find(name, &volinfo);
if (volinfo && volinfo->stage_deleted) {
@@ -869,8 +859,8 @@ glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
volinfo->stage_deleted = _gf_false;
gf_log(this->name, GF_LOG_INFO,
"Volume %s still exist, setting "
- "stage deleted flag to false for the volume %s",
- volinfo->volname, volinfo->volname);
+ "stage deleted flag to false for the volume",
+ volinfo->volname);
}
ret = 0;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h
index 8878a30d0bf..44667cebd3d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-locks.h
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.h
@@ -40,9 +40,6 @@ void
glusterd_mgmt_v3_lock_timer_fini();
int32_t
-glusterd_get_mgmt_v3_lock_owner(char *volname, uuid_t *uuid);
-
-int32_t
glusterd_mgmt_v3_lock(const char *key, uuid_t uuid, uint32_t *op_errno,
char *type);
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
index 4742225beb5..34abf35cb00 100644
--- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -16,7 +16,7 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include <signal.h>
@@ -43,6 +43,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -75,7 +76,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req)
"for volume %s",
volname);
- ret = dict_set_uint64(dict, "rotate-key", (uint64_t)time(NULL));
+ ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time());
if (ret)
goto out;
@@ -105,7 +106,6 @@ glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr)
int ret = -1;
char *volname = NULL;
glusterd_volinfo_t *volinfo = NULL;
- gf_boolean_t exists = _gf_false;
char msg[2048] = {0};
char *brick = NULL;
@@ -116,13 +116,11 @@ glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if (!exists) {
+ if (ret) {
snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
*op_errstr = gf_strdup(msg);
- ret = -1;
goto out;
}
@@ -141,6 +139,8 @@ glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr)
/* If no brick is specified, do log-rotate for
all the bricks in the volume */
if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
ret = 0;
goto out;
}
@@ -207,8 +207,11 @@ glusterd_op_log_rotate(dict_t *dict)
ret = dict_get_str(dict, "brick", &brick);
/* If no brick is specified, do log-rotate for
all the bricks in the volume */
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
goto cont;
+ }
ret = glusterd_brickinfo_new_from_brick(brick, &tmpbrkinfo, _gf_false,
NULL);
@@ -229,8 +232,9 @@ cont:
if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
continue;
- if (brick && (strcmp(tmpbrkinfo->hostname, brickinfo->hostname) ||
- strcmp(tmpbrkinfo->path, brickinfo->path)))
+ if (tmpbrkinfo && brick &&
+ (strcmp(tmpbrkinfo->hostname, brickinfo->hostname) ||
+ strcmp(tmpbrkinfo->path, brickinfo->path)))
continue;
valid_brick = 1;
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index 210d0f8658c..d7257e1a7b5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -11,67 +11,48 @@
#ifndef __GLUSTERD_MEM_TYPES_H__
#define __GLUSTERD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_gld_mem_types_ {
- gf_gld_mt_dir_entry_t = gf_common_mt_end + 1,
- gf_gld_mt_volfile_ctx = gf_common_mt_end + 2,
- gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3,
- gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 4,
- gf_gld_mt_locker = gf_common_mt_end + 5,
- gf_gld_mt_string = gf_common_mt_end + 6,
- gf_gld_mt_lock_table = gf_common_mt_end + 7,
- gf_gld_mt_char = gf_common_mt_end + 8,
- gf_gld_mt_glusterd_connection_t = gf_common_mt_end + 9,
- gf_gld_mt_resolve_comp = gf_common_mt_end + 10,
- gf_gld_mt_peerinfo_t = gf_common_mt_end + 11,
- gf_gld_mt_friend_sm_event_t = gf_common_mt_end + 12,
- gf_gld_mt_friend_req_ctx_t = gf_common_mt_end + 13,
- gf_gld_mt_friend_update_ctx_t = gf_common_mt_end + 14,
- gf_gld_mt_op_sm_event_t = gf_common_mt_end + 15,
- gf_gld_mt_op_lock_ctx_t = gf_common_mt_end + 16,
- gf_gld_mt_op_stage_ctx_t = gf_common_mt_end + 17,
- gf_gld_mt_op_commit_ctx_t = gf_common_mt_end + 18,
- gf_gld_mt_mop_stage_req_t = gf_common_mt_end + 19,
- gf_gld_mt_probe_ctx_t = gf_common_mt_end + 20,
- gf_gld_mt_create_volume_ctx_t = gf_common_mt_end + 21,
- gf_gld_mt_start_volume_ctx_t = gf_common_mt_end + 22,
- gf_gld_mt_stop_volume_ctx_t = gf_common_mt_end + 23,
- gf_gld_mt_delete_volume_ctx_t = gf_common_mt_end + 24,
- gf_gld_mt_glusterd_volinfo_t = gf_common_mt_end + 25,
- gf_gld_mt_glusterd_brickinfo_t = gf_common_mt_end + 26,
- gf_gld_mt_peer_hostname_t = gf_common_mt_end + 27,
- gf_gld_mt_ifreq = gf_common_mt_end + 28,
- gf_gld_mt_store_handle_t = gf_common_mt_end + 29,
- gf_gld_mt_store_iter_t = gf_common_mt_end + 30,
- gf_gld_mt_defrag_info = gf_common_mt_end + 31,
- gf_gld_mt_log_filename_ctx_t = gf_common_mt_end + 32,
- gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33,
- gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34,
- gf_gld_mt_peerctx_t = gf_common_mt_end + 35,
- gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36,
- gf_gld_mt_pending_node_t = gf_common_mt_end + 37,
- gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38,
- gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39,
- gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40,
- gf_gld_mt_linearr = gf_common_mt_end + 41,
- gf_gld_mt_linebuf = gf_common_mt_end + 42,
- gf_gld_mt_mount_pattern = gf_common_mt_end + 43,
- gf_gld_mt_mount_comp_container = gf_common_mt_end + 44,
- gf_gld_mt_mount_component = gf_common_mt_end + 45,
- gf_gld_mt_mount_spec = gf_common_mt_end + 46,
- gf_gld_mt_georep_meet_spec = gf_common_mt_end + 47,
- gf_gld_mt_nodesrv_t = gf_common_mt_end + 48,
- gf_gld_mt_charptr = gf_common_mt_end + 49,
- gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50,
- gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51,
- gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52,
- gf_gld_mt_int = gf_common_mt_end + 53,
- gf_gld_mt_snap_t = gf_common_mt_end + 54,
- gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
- gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56,
- gf_gld_mt_local_peers_t = gf_common_mt_end + 57,
- gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58,
- gf_gld_mt_end = gf_common_mt_end + 59,
+ gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 1,
+ gf_gld_mt_char,
+ gf_gld_mt_peerinfo_t,
+ gf_gld_mt_friend_sm_event_t,
+ gf_gld_mt_friend_req_ctx_t,
+ gf_gld_mt_friend_update_ctx_t,
+ gf_gld_mt_op_sm_event_t,
+ gf_gld_mt_op_lock_ctx_t,
+ gf_gld_mt_op_stage_ctx_t,
+ gf_gld_mt_op_commit_ctx_t,
+ gf_gld_mt_mop_stage_req_t,
+ gf_gld_mt_probe_ctx_t,
+ gf_gld_mt_glusterd_volinfo_t,
+ gf_gld_mt_volinfo_dict_data_t,
+ gf_gld_mt_glusterd_brickinfo_t,
+ gf_gld_mt_peer_hostname_t,
+ gf_gld_mt_defrag_info,
+ gf_gld_mt_peerctx_t,
+ gf_gld_mt_sm_tr_log_t,
+ gf_gld_mt_pending_node_t,
+ gf_gld_mt_brick_rsp_ctx_t,
+ gf_gld_mt_mop_brick_req_t,
+ gf_gld_mt_op_allack_ctx_t,
+ gf_gld_mt_linearr,
+ gf_gld_mt_linebuf,
+ gf_gld_mt_mount_pattern,
+ gf_gld_mt_mount_comp_container,
+ gf_gld_mt_mount_spec,
+ gf_gld_mt_georep_meet_spec,
+ gf_gld_mt_charptr,
+ gf_gld_mt_hooks_stub_t,
+ gf_gld_mt_hooks_priv_t,
+ gf_gld_mt_mop_commit_req_t,
+ gf_gld_mt_int,
+ gf_gld_mt_snap_t,
+ gf_gld_mt_missed_snapinfo_t,
+ gf_gld_mt_snap_create_args_t,
+ gf_gld_mt_glusterd_brick_proc_t,
+ gf_gld_mt_glusterd_svc_proc_t,
+ gf_gld_mt_end,
} gf_gld_mem_types_t;
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 41fedf9e288..3a1e600fb03 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -11,7 +11,7 @@
#ifndef _GLUSTERD_MESSAGES_H_
#define _GLUSTERD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -46,7 +46,7 @@ GLFS_MSGID(
GD_MSG_SNAP_STATUS_FAIL, GD_MSG_SNAP_INIT_FAIL, GD_MSG_VOLINFO_SET_FAIL,
GD_MSG_VOLINFO_GET_FAIL, GD_MSG_BRICK_CREATION_FAIL,
GD_MSG_BRICK_GET_INFO_FAIL, GD_MSG_BRICK_NEW_INFO_FAIL, GD_MSG_LVS_FAIL,
- GD_MSG_SETXATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED,
+ GD_MSG_SET_XATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED,
GD_MSG_SNAP_NOT_FOUND, GD_MSG_FS_LABEL_UPDATE_FAIL, GD_MSG_LVM_MOUNT_FAILED,
GD_MSG_DICT_SET_FAILED, GD_MSG_CANONICALIZE_FAIL, GD_MSG_DICT_GET_FAILED,
GD_MSG_SNAP_INFO_FAIL, GD_MSG_SNAP_VOL_CONFIG_FAIL,
@@ -78,7 +78,7 @@ GLFS_MSGID(
GD_MSG_COMMIT_OP_FAIL, GD_MSG_PEER_LIST_CREATE_FAIL, GD_MSG_BRICK_OP_FAIL,
GD_MSG_OPINFO_SET_FAIL, GD_MSG_OP_EVENT_UNLOCK_FAIL,
GD_MSG_MGMTV3_OP_RESP_FAIL, GD_MSG_PEER_NOT_FOUND, GD_MSG_REQ_DECODE_FAIL,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED,
GD_MSG_PRE_VALD_RESP_FAIL, GD_MSG_SVC_GET_FAIL, GD_MSG_VOLFILE_NOT_FOUND,
GD_MSG_OP_EVENT_LOCK_FAIL, GD_MSG_NON_STRIPE_VOL, GD_MSG_SNAPD_OBJ_GET_FAIL,
GD_MSG_QUOTA_DISABLED, GD_MSG_CACHE_MINMAX_SIZE_INVALID,
@@ -116,7 +116,7 @@ GLFS_MSGID(
GD_MSG_PARSE_BRICKINFO_FAIL, GD_MSG_VERS_STORE_FAIL, GD_MSG_HEADER_ADD_FAIL,
GD_MSG_QUOTA_CONF_WRITE_FAIL, GD_MSG_QUOTA_CONF_CORRUPT, GD_MSG_FORK_FAIL,
GD_MSG_CKSUM_COMPUTE_FAIL, GD_MSG_VERS_CKSUM_STORE_FAIL,
- GD_MSG_GETXATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE,
+ GD_MSG_GET_XATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE,
GD_MSG_VOL_STOPPED, GD_MSG_OPCTX_GET_FAIL, GD_MSG_TASKID_GEN_FAIL,
GD_MSG_REBALANCE_ID_MISSING, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, GD_MSG_UUID_GEN_STORE_FAIL,
@@ -298,6 +298,154 @@ GLFS_MSGID(
GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
GD_MSG_MANAGER_FUNCTION_FAILED,
- GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL);
+ GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, GD_MSG_SHD_START_FAIL,
+ GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, GD_MSG_ATTACH_INFO,
+ GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL,
+ GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, GD_MSG_CLUSTER_RC_ENABLE,
+ GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, GD_MSG_REMOVE_ARBITER_BRICK,
+ GD_MSG_BRICK_NOT_DECOM, GD_MSG_BRICK_STOPPED, GD_MSG_BRICK_DEAD,
+ GD_MSG_BRICK_HOST_NOT_FOUND, GD_MSG_BRICK_HOST_DOWN, GD_MSG_BRICK_DELETE,
+ GD_MSG_BRICK_NO_REMOVE_CMD, GD_MSG_MIGRATION_PROG, GD_MSG_MIGRATION_FAIL,
+ GD_MSG_COPY_FAIL, GD_MSG_REALPATH_GET_FAIL,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, GD_MSG_STRCHR_FAIL, GD_MSG_SPLIT_FAIL,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, GD_MSG_VOL_SHD_NOT_COMP,
+ GD_MSG_BITROT_NOT_ENABLED, GD_MSG_CREATE_BRICK_DIR_FAILED,
+ GD_MSG_CREATE_GLUSTER_DIR_FAILED, GD_MSG_BRICK_CREATE_MNTPNT,
+ GD_MSG_BRICK_CREATE_ROOT, GD_MSG_SET_XATTR_BRICK_FAIL,
+ GD_MSG_REMOVE_XATTR_FAIL, GD_MSG_XLATOR_NOT_DEFINED,
+ GD_MSG_BRICK_NOT_RUNNING, GD_MSG_INCORRECT_BRICK, GD_MSG_UUID_GET_FAIL,
+ GD_MSG_INVALID_ARGUMENT, GD_MSG_FRAME_CREATE_FAIL,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED,
+ GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED,
+ GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL,
+ GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL);
+
+#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry"
+#define GD_MSG_INVALID_ARGUMENT_STR \
+ "Invalid arguments have been given to function"
+#define GD_MSG_GARBAGE_ARGS_STR "Garbage args received"
+#define GD_MSG_BRICK_SUBVOL_VERIFY_FAIL_STR "Brick's subvol verification fail"
+#define GD_MSG_REMOVE_ARBITER_BRICK_STR "Failed to remove arbiter bricks"
+#define GD_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define GD_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define GD_MSG_BRICK_NOT_FOUND_STR "Brick not found in volume"
+#define GD_MSG_BRICK_NOT_DECOM_STR "Brick is not decommissoned"
+#define GD_MSG_BRICK_STOPPED_STR "Found stopped brick"
+#define GD_MSG_BRICK_DEAD_STR "Found dead brick"
+#define GD_MSG_BRICK_HOST_NOT_FOUND_STR \
+ "Host node of the brick is not a part of cluster"
+#define GD_MSG_BRICK_HOST_DOWN_STR "Host node of the brick is down"
+#define GD_MSG_BRICK_DELETE_STR \
+ "Deleting all the bricks of the volume is not allowed"
+#define GD_MSG_BRICK_NO_REMOVE_CMD_STR "No remove-brick command issued"
+#define GD_MSG_INCORRECT_BRICK_STR "Incorrect brick for volume"
+#define GD_MSG_MIGRATION_PROG_STR "Migration is in progress"
+#define GD_MSG_MIGRATION_FAIL_STR "Migration has failed"
+#define GD_MSG_XLATOR_NOT_DEFINED_STR "Xlator not defined"
+#define GD_MSG_DICT_CREATE_FAIL_STR "Failed to create dictionary"
+#define GD_MSG_COPY_FAIL_STR "Failed to copy"
+#define GD_MSG_UUID_GET_FAIL_STR "Failed to get the uuid of local glusterd"
+#define GD_MSG_GEO_REP_START_FAILED_STR "Georep start failed for volume"
+#define GD_MSG_REALPATH_GET_FAIL_STR "Failed to get realpath"
+#define GD_MSG_FILE_NOT_FOUND_STR "File not found in directory"
+#define GD_MSG_SRC_FILE_ERROR_STR "Error in source file"
+#define GD_MSG_DICT_UNSERIALIZE_FAIL_STR "Failed to unserialize dict"
+#define GD_MSG_VOL_ID_SET_FAIL_STR "Failed to set volume id"
+#define GD_MSG_ARBITER_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add arbiter info to brick"
+#define GD_MSG_NO_MEMORY_STR "Out of memory"
+#define GD_MSG_GLUSTERD_UMOUNT_FAIL_STR "Failed to unmount path"
+#define GD_MSG_PEER_ADD_FAIL_STR "Failed to add new peer"
+#define GD_MSG_BRICK_GET_INFO_FAIL_STR "Failed to get brick info"
+#define GD_MSG_STRCHR_FAIL_STR "Failed to get the character"
+#define GD_MSG_SPLIT_FAIL_STR "Failed to split"
+#define GD_MSG_VOLINFO_GET_FAIL_STR "Failed to get volinfo"
+#define GD_MSG_PEER_NOT_FOUND_STR "Failed to find peer info"
+#define GD_MSG_DICT_COPY_FAIL_STR "Failed to copy values from dictionary"
+#define GD_MSG_ALLOC_AND_COPY_UUID_FAIL_STR \
+ "Failed to allocate memory or copy uuid"
+#define GD_MSG_VOL_NOT_FOUND_STR "Volume not found"
+#define GD_MSG_PEER_DISCONNECTED_STR "Peer is disconnected"
+#define GD_MSG_QUOTA_GET_STAT_FAIL_STR "Failed to get quota status"
+#define GD_MSG_SNAP_STATUS_FAIL_STR "Failed to get status of snapd"
+#define GD_MSG_VALIDATE_FAILED_STR "Failed to validate volume"
+#define GD_MSG_VOL_NOT_STARTED_STR "Volume is not started"
+#define GD_MSG_VOL_SHD_NOT_COMP_STR "Volume is not Self-heal compatible"
+#define GD_MSG_SELF_HEALD_DISABLED_STR "Self-heal daemon is disabled"
+#define GD_MSG_NFS_GANESHA_DISABLED_STR "NFS server is disabled"
+#define GD_MSG_QUOTA_DISABLED_STR "Quota is disabled"
+#define GD_MSG_BITROT_NOT_RUNNING_STR "Bitrot is not enabled"
+#define GD_MSG_BITROT_NOT_ENABLED_STR "Volume does not have bitrot enabled"
+#define GD_MSG_SNAPD_NOT_RUNNING_STR "Snapd is not enabled"
+#define GD_MSG_STRDUP_FAILED_STR "Strdup operation failed"
+#define GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL_STR \
+ "Failed to get quorum cluster counts"
+#define GD_MSG_GLUSTER_SERVICE_START_FAIL_STR "Failed to start glusterd service"
+#define GD_MSG_PEER_ADDRESS_GET_FAIL_STR "Failed to get the address of peer"
+#define GD_MSG_INVALID_SLAVE_STR "Volume is not a slave volume"
+#define GD_MSG_BRICK_NOT_RUNNING_STR "One or more bricks are not running"
+#define GD_MSG_BRK_MNTPATH_GET_FAIL_STR "Failed to get brick mount device"
+#define GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED_STR \
+ "Snapshot is supported only for thin provisioned LV."
+#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL_STR \
+ "Failed to copy snapshot device name"
+#define GD_MSG_SNAP_NOT_FOUND_STR "Snapshot does not exist"
+#define GD_MSG_CREATE_BRICK_DIR_FAILED_STR "Failed to create brick directory"
+#define GD_MSG_LSTAT_FAIL_STR "Lstat operation failed"
+#define GD_MSG_DIR_OP_FAILED_STR \
+ "The provided path is already present. It is not a directory"
+#define GD_MSG_BRICK_CREATION_FAIL_STR \
+ "Brick isn't allowed to be created inside glusterd's working directory."
+#define GD_MSG_BRICK_CREATE_ROOT_STR \
+ "The brick is being created in the root partition. It is recommended " \
+ "that you don't use the system's root partition for storage backend."
+#define GD_MSG_BRICK_CREATE_MNTPNT_STR \
+ "The brick is a mount point. Please create a sub-directory under the " \
+ "mount point and use that as the brick directory."
+#define GD_MSG_CREATE_GLUSTER_DIR_FAILED_STR \
+ "Failed to create glusterfs directory"
+#define GD_MSG_VOLINFO_IMPORT_FAIL_STR "Volume is not yet imported"
+#define GD_MSG_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_SET_XATTR_BRICK_FAIL_STR \
+ "Glusterfs is not supported on brick. Setting extended attribute failed"
+#define GD_MSG_SET_XATTR_FAIL_STR "Failed to set extended attribute"
+#define GD_MSG_REMOVE_XATTR_FAIL_STR "Failed to remove extended attribute"
+#define GD_MSG_XLATOR_SET_OPT_FAIL_STR "Failed to set xlator type"
+#define GD_MSG_XLATOR_LINK_FAIL_STR \
+ "Failed to do the link of xlator with children"
+#define GD_MSG_READ_ERROR_STR "Failed to read directory"
+#define GD_MSG_INCOMPATIBLE_VALUE_STR "Incompatible transport type"
+#define GD_MSG_VOL_STOP_ARGS_GET_FAILED_STR "Failed to get volume stop args"
+#define GD_MSG_FRAME_CREATE_FAIL_STR "Failed to create frame"
+#define GD_MSG_VOLUME_NOT_IMPORTED_STR "Volume has not been imported"
+#define GD_MSG_ADD_BRICK_MNT_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_GET_MNT_ENTRY_INFO_FAIL_STR "Failed to get mount entry details"
+#define GD_MSG_BRICKPATH_ROOT_GET_FAIL_STR "failed to get brick root details"
+#define GD_MSG_VOL_INFO_REQ_RECVD_STR "Received get volume info req"
+#define GD_MSG_NO_FLAG_SET_STR "No flags set"
+#define GD_MSG_CREATE_DIR_FAILED_STR "Failed to create directory"
+#define GD_MSG_POST_HOOK_STUB_INIT_FAIL_STR \
+ "Failed to initialize post hooks stub"
+#define GD_MSG_FILE_OP_FAILED_STR "File operation failed"
+#define GD_MSG_INODE_SIZE_GET_FAIL_STR "Failed to get inode size"
+#define GD_MSG_CMD_EXEC_FAIL_STR "Command execution failed"
+#define GD_MSG_XLATOR_CREATE_FAIL_STR "Failed to create xlator"
+#define GD_MSG_CLRCLK_VOL_REQ_RCVD_STR "Received clear-locks request for volume"
+#define GD_MSG_BRK_PORT_NUM_GET_FAIL_STR \
+ "Couldn't get port number of local bricks"
+#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL_STR \
+ "Creating mount directory for clear-locks failed"
+#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL_STR \
+ "Failed to mount clear-locks maintenance client"
+#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL_STR \
+ "Failed to unmount clear-locks mount point"
+#define GD_MSG_CLRCLK_SND_CMD_FAIL_STR "Failed to send command for clear-locks"
+#define GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL_STR \
+ "Failed to allocate memory or get serialized length of dict"
+#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute"
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
index c8b080cc0ca..1069688a89d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -165,6 +165,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -174,6 +175,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ctx->dict = dict_new();
if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -181,8 +183,8 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
&ctx->dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -264,8 +266,8 @@ glusterd_mgmt_v3_pre_validate_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -315,20 +317,21 @@ glusterd_handle_pre_validate_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -391,8 +394,8 @@ glusterd_mgmt_v3_brick_op_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -441,20 +444,21 @@ glusterd_handle_brick_op_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -518,8 +522,8 @@ glusterd_mgmt_v3_commit_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -569,20 +573,21 @@ glusterd_handle_commit_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -621,6 +626,136 @@ out:
}
static int
+glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ uint32_t op_errno, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_commit_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "post commit failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send post commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
int32_t status, char *op_errstr,
dict_t *rsp_dict)
@@ -646,8 +781,8 @@ glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -696,20 +831,21 @@ glusterd_handle_post_validate_fn(rpcsvc_request_t *req)
}
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
rsp_dict = dict_new();
if (!rsp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
- "Failed to get new dictionary");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
return -1;
}
@@ -867,6 +1003,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -876,6 +1013,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ctx->dict = dict_new();
if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -883,8 +1021,8 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
&ctx->dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -955,6 +1093,12 @@ glusterd_handle_commit(rpcsvc_request_t *req)
}
static int
+glusterd_handle_post_commit(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn);
+}
+
+static int
glusterd_handle_post_validate(rpcsvc_request_t *req)
{
return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn);
@@ -966,25 +1110,28 @@ glusterd_handle_mgmt_v3_unlock(rpcsvc_request_t *req)
return glusterd_big_locked_handler(req, glusterd_handle_mgmt_v3_unlock_fn);
}
-rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
- [GLUSTERD_MGMT_V3_NULL] = {"NULL", GLUSTERD_MGMT_V3_NULL,
- glusterd_mgmt_v3_null, NULL, 0, DRC_NA},
- [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK,
- glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA},
- [GLUSTERD_MGMT_V3_PRE_VALIDATE] = {"PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE,
- glusterd_handle_pre_validate, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_V3_BRICK_OP] = {"BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP,
- glusterd_handle_brick_op, NULL, 0, DRC_NA},
- [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", GLUSTERD_MGMT_V3_COMMIT,
- glusterd_handle_commit, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = {"NULL", glusterd_mgmt_v3_null, NULL,
+ GLUSTERD_MGMT_V3_NULL, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_handle_mgmt_v3_lock,
+ NULL, GLUSTERD_MGMT_V3_LOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_PRE_VALIDATE] = {"PRE_VAL", glusterd_handle_pre_validate,
+ NULL, GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_BRICK_OP] = {"BRCK_OP", glusterd_handle_brick_op, NULL,
+ GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL,
+ GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT",
+ glusterd_handle_post_commit, NULL,
+ GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0},
[GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL",
- GLUSTERD_MGMT_V3_POST_VALIDATE,
- glusterd_handle_post_validate, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK,
- glusterd_handle_mgmt_v3_unlock, NULL, 0,
- DRC_NA},
+ glusterd_handle_post_validate, NULL,
+ GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA,
+ 0},
+ [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK",
+ glusterd_handle_mgmt_v3_unlock, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK, DRC_NA, 0},
};
struct rpcsvc_program gd_svc_mgmt_v3_prog = {
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index c7e9193d8aa..bca7221062b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -19,6 +19,7 @@
#include "glusterd-locks.h"
#include "glusterd-mgmt.h"
#include "glusterd-op-sm.h"
+#include "glusterd-server-quorum.h"
#include "glusterd-volgen.h"
#include "glusterd-store.h"
#include "glusterd-snapshot-utils.h"
@@ -51,14 +52,14 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
args->op_ret = op_ret;
args->op_errno = op_errno;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(peerid, NULL);
if (peerinfo)
peer_str = gf_strdup(peerinfo->hostname);
else
peer_str = gf_strdup(uuid_utoa(uuid));
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
is_operrstr_blk = (op_errstr && strcmp(op_errstr, ""));
err_string = (is_operrstr_blk) ? op_errstr : err_str;
@@ -85,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
peer_str, err_string);
break;
}
+ case GLUSTERD_MGMT_V3_POST_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
case GLUSTERD_MGMT_V3_POST_VALIDATE: {
snprintf(op_err, sizeof(op_err),
"Post Validation failed on %s. %s", peer_str,
@@ -159,7 +165,6 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
goto out;
}
break;
- case GD_OP_ADD_TIER_BRICK:
case GD_OP_ADD_BRICK:
ret = glusterd_op_stage_add_brick(dict, op_errstr, rsp_dict);
if (ret) {
@@ -187,15 +192,12 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
goto out;
}
break;
- case GD_OP_TIER_START_STOP:
- case GD_OP_TIER_STATUS:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_REMOVE_TIER_BRICK:
- ret = glusterd_op_stage_tier(dict, op_errstr, rsp_dict);
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick(dict, op_errstr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
- "tier "
- "prevalidation failed");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Remove brick prevalidation failed.");
goto out;
}
break;
@@ -210,6 +212,25 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stage_stats_volume(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "prevalidation failed for profile operation.");
+ goto out;
+ }
+ break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_mgmt_v3_op_stage_rebalance(dict, op_errstr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Rebalance Prevalidate Failed");
+ goto out;
+ }
+ break;
+
case GD_OP_MAX_OPVERSION:
ret = 0;
break;
@@ -247,6 +268,19 @@ gd_mgmt_v3_brick_op_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
}
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME: {
+ ret = gd_brick_op_phase(op, rsp_dict, dict, op_errstr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s brickop "
+ "failed",
+ gd_op_list[op]);
+ goto out;
+ }
+ break;
+ }
default:
break;
}
@@ -263,7 +297,6 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
{
int32_t ret = -1;
xlator_t *this = NULL;
- int32_t cmd = 0;
this = THIS;
GF_ASSERT(this);
@@ -319,6 +352,15 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
}
+ case GD_OP_REMOVE_BRICK: {
+ ret = glusterd_op_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Remove-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
case GD_OP_RESET_BRICK: {
ret = glusterd_op_reset_brick(dict, rsp_dict);
if (ret) {
@@ -337,52 +379,67 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
}
break;
}
- case GD_OP_TIER_START_STOP: {
- ret = glusterd_op_tier_start_stop(dict, op_errstr, rsp_dict);
+ case GD_OP_PROFILE_VOLUME: {
+ ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
- "tier commit failed.");
+ "commit failed for volume profile operation.");
goto out;
}
break;
}
- case GD_OP_REMOVE_TIER_BRICK: {
- ret = glusterd_op_remove_tier_brick(dict, op_errstr, rsp_dict);
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME: {
+ ret = glusterd_mgmt_v3_op_rebalance(dict, op_errstr, rsp_dict);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
- "tier detach commit failed.");
+ "Rebalance Commit Failed");
goto out;
}
- ret = dict_get_int32n(dict, "rebalance-command",
- SLEN("rebalance-command"), &cmd);
- if (ret) {
- gf_msg_debug(this->name, 0, "cmd not found");
- goto out;
- }
-
- if (cmd != GF_DEFRAG_CMD_DETACH_STOP)
- break;
+ break;
}
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_TIER_STATUS: {
- ret = glusterd_op_tier_status(dict, op_errstr, rsp_dict, op);
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_post_commit_add_brick(dict, op_errstr);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_COMMIT_OP_FAIL,
- "tier status commit failed");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Add-brick post commit failed.");
goto out;
}
break;
- }
- case GD_OP_ADD_TIER_BRICK: {
- ret = glusterd_op_add_tier_brick(dict, op_errstr);
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_post_commit_replace_brick(dict, op_errstr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
- "tier add-brick commit failed.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Replace-brick post commit failed.");
goto out;
}
break;
- }
-
default:
break;
}
@@ -401,7 +458,6 @@ gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
xlator_t *this = NULL;
char *volname = NULL;
glusterd_volinfo_t *volinfo = NULL;
- glusterd_svc_t *svc = NULL;
this = THIS;
GF_ASSERT(this);
@@ -466,12 +522,6 @@ gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
- if (ret)
- goto out;
- }
break;
}
case GD_OP_STOP_VOLUME: {
@@ -492,49 +542,6 @@ gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
}
break;
}
- case GD_OP_ADD_TIER_BRICK: {
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get"
- " volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
- "Unable to "
- "allocate memory");
- goto out;
- }
- ret = glusterd_create_volfiles_and_notify_services(volinfo);
- if (ret)
- goto out;
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get"
- " volume name");
- goto out;
- }
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "dict set "
- "failed");
- goto out;
- }
- ret = -1;
- svc = &(volinfo->tierd.svc);
- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
- if (ret)
- goto out;
- }
default:
break;
@@ -640,15 +647,21 @@ gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK,
@@ -705,10 +718,13 @@ glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict,
/* Sending mgmt_v3 lock req to other nodes in the cluster */
gd_syncargs_init(&args, NULL);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -726,7 +742,7 @@ glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict,
gd_mgmt_v3_lock(op, dict, peerinfo, &args, MY_UUID, peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -796,7 +812,6 @@ glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
break;
case GD_OP_START_VOLUME:
case GD_OP_ADD_BRICK:
- case GD_OP_ADD_TIER_BRICK:
ret = glusterd_aggr_brick_mount_dirs(aggr, rsp);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -815,10 +830,10 @@ glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
goto out;
}
case GD_OP_STOP_VOLUME:
- case GD_OP_TIER_STATUS:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_TIER_START_STOP:
- case GD_OP_REMOVE_TIER_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_REBALANCE:
break;
case GD_OP_MAX_OPVERSION:
break;
@@ -954,15 +969,21 @@ gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -1004,6 +1025,16 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
goto out;
}
+ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
+ op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) {
+ ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+ }
+
/* Pre Validation on local node */
ret = gd_mgmt_v3_pre_validate_fn(op, req_dict, op_errstr, rsp_dict,
op_errno);
@@ -1043,10 +1074,13 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
/* Sending Pre Validation req to other nodes in the cluster */
gd_syncargs_init(&args, req_dict);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1065,7 +1099,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1120,9 +1154,11 @@ glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
case GD_OP_START_VOLUME:
case GD_OP_STOP_VOLUME:
case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_REPLACE_BRICK:
case GD_OP_RESET_BRICK:
- case GD_OP_ADD_TIER_BRICK: {
+ case GD_OP_PROFILE_VOLUME: {
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, errno,
@@ -1139,12 +1175,29 @@ glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
}
dict_copy(dict, req_dict);
} break;
- case GD_OP_TIER_START_STOP:
- case GD_OP_REMOVE_TIER_BRICK:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_TIER_STATUS:
+
+ case GD_OP_REBALANCE: {
+ if (gd_set_commit_hash(dict) != 0) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all")) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
dict_copy(dict, req_dict);
- break;
+ } break;
+
default:
break;
}
@@ -1167,6 +1220,7 @@ gd_mgmt_v3_brick_op_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
call_frame_t *frame = NULL;
int32_t op_ret = -1;
int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
xlator_t *this = NULL;
uuid_t *peerid = NULL;
@@ -1196,20 +1250,53 @@ gd_mgmt_v3_brick_op_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
if (ret < 0)
goto out;
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME ||
+ rsp.op == GD_OP_PROFILE_VOLUME)
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
- op_ret = rsp.op_ret;
- op_errno = rsp.op_errno;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
out:
+
gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
GLUSTERD_MGMT_V3_BRICK_OP, *peerid, rsp.uuid);
if (rsp.op_errstr)
free(rsp.op_errstr);
- if (rsp.dict.dict_val)
- free(rsp.dict.dict_val);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
GF_FREE(peerid);
/* req->rpc_status set to -1 means, STACK_DESTROY will be called from
* the caller function.
@@ -1250,15 +1337,21 @@ gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP,
@@ -1271,8 +1364,8 @@ out:
}
int
-glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *req_dict, char **op_errstr,
- uint32_t txn_generation)
+glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t txn_generation)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
@@ -1319,16 +1412,28 @@ glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *req_dict, char **op_errstr,
}
goto out;
}
+ if (op == GD_OP_DEFRAG_BRICK_VOLUME || op == GD_OP_PROFILE_VOLUME) {
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+ }
dict_unref(rsp_dict);
rsp_dict = NULL;
/* Sending brick op req to other nodes in the cluster */
- gd_syncargs_init(&args, NULL);
- synctask_barrier_init((&args));
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1347,7 +1452,7 @@ glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *req_dict, char **op_errstr,
peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1495,15 +1600,21 @@ gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT,
@@ -1528,7 +1639,6 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
- int32_t count = 0;
this = THIS;
GF_ASSERT(this);
@@ -1540,6 +1650,26 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
GF_ASSERT(op_errstr);
GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ switch (op) {
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+
+ ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id in dict.");
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id for remove-brick in dict.");
+ }
+ break;
+ default:
+ break;
+ }
rsp_dict = dict_new();
if (!rsp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
@@ -1582,10 +1712,12 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
/* Sending commit req to other nodes in the cluster */
gd_syncargs_init(&args, op_ctx);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1593,22 +1725,9 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
*/
if (peerinfo->generation > txn_generation)
continue;
-
- if (!peerinfo->connected) {
- if (op == GD_OP_TIER_STATUS || op == GD_OP_DETACH_TIER_STATUS) {
- ret = dict_get_int32n(args.dict, "count", SLEN("count"),
- &count);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "failed to get index");
- count++;
- ret = dict_set_int32n(args.dict, "count", SLEN("count"), count);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "failed to set index");
- }
+ if (!peerinfo->connected)
continue;
- }
+
if (op != GD_OP_SYNC_VOLUME &&
peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
continue;
@@ -1617,7 +1736,7 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1647,6 +1766,274 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post commit on local node */
+ ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending post commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -1736,16 +2123,22 @@ gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
req.op_ret = op_ret;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(
peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
@@ -1787,14 +2180,6 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict,
goto out;
}
- /* Copy the contents of dict like missed snaps info to req_dict */
- if (op != GD_OP_REMOVE_TIER_BRICK)
- /* dict and req_dict has the same values during remove tier
- * brick (detach start) So this rewrite make the remove brick
- * id to become empty.
- * Avoiding to copy it retains the value. */
- dict_copy(dict, req_dict);
-
/* Post Validation on local node */
ret = gd_mgmt_v3_post_validate_fn(op, op_ret, req_dict, op_errstr,
rsp_dict);
@@ -1823,10 +2208,13 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict,
/* Sending Post Validation req to other nodes in the cluster */
gd_syncargs_init(&args, req_dict);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1845,7 +2233,7 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict,
MY_UUID, peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1957,15 +2345,21 @@ gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
req.op = op;
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
goto out;
+ }
ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
&gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK,
@@ -2010,7 +2404,7 @@ glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict,
if (ret)
goto out;
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -2028,7 +2422,7 @@ glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict,
gd_mgmt_v3_unlock(op, dict, peerinfo, &args, MY_UUID, peer_uuid);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -2057,6 +2451,173 @@ out:
}
int32_t
+glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(*originator_uuid, MY_UUID);
+ ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
+ sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE(originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32(dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ goto out;
+ }
+ dict_copy(dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
+ &is_acquired, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation Failed");
+ goto out;
+ }
+
+ /* BRICK-OPS */
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Brick Op Failed");
+ goto out;
+ }
+
+ /* COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit Op Failed");
+ goto out;
+ }
+
+ /* POST-COMMIT VALIDATE PHASE */
+ /* As of now, post_validate is not trying to cleanup any failed
+ commands. So as of now, I am sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate(op, 0, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation Failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ op_ret = ret;
+ /* UNLOCK PHASE FOR PEERS*/
+ (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
+ is_acquired, txn_generation);
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ if (op != GD_OP_MAX_OPVERSION) {
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict,
+ op_errstr);
+ }
+
+ if (req_dict)
+ dict_unref(req_dict);
+
+ if (tmp_dict)
+ dict_unref(tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
dict_t *dict)
{
@@ -2161,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
+ /* POST COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit Op Failed");
+ goto out;
+ }
+
/* POST-COMMIT VALIDATE PHASE */
/* As of now, post_validate is not trying to cleanup any failed
commands. So as of now, I am sending 0 (op_ret as 0).
@@ -2398,7 +2968,8 @@ glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
- ret = glusterd_mgmt_v3_brick_op(op, req_dict, &op_errstr, txn_generation);
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
"Brick Ops Failed");
@@ -2458,7 +3029,8 @@ unbarrier:
goto out;
}
- ret = glusterd_mgmt_v3_brick_op(op, req_dict, &op_errstr, txn_generation);
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
index 9b57f4cb833..27dd1849519 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -28,6 +28,10 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
uint32_t *op_errno, dict_t *rsp_dict);
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict);
@@ -36,6 +40,11 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
dict_t *dict);
int32_t
+glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict);
+
+int32_t
glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op,
dict_t *dict);
@@ -79,4 +88,10 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
dict_t *rsp_dict);
int
glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict);
+
+int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr);
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr);
#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
index 356a4bcca67..645d845ee76 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -11,21 +11,21 @@
#include <fnmatch.h>
#include <pwd.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
-#include "list.h"
-#include "logging.h"
-#include "syscall.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/run.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-utils.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-mountbroker.h"
#include "glusterd-op-sm.h"
#include "glusterd-messages.h"
@@ -81,6 +81,7 @@ parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc)
mspec->patterns = GF_CALLOC(mspec->len, sizeof(*mspec->patterns),
gf_gld_mt_mount_pattern);
if (!mspec->patterns) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -233,7 +234,7 @@ const char *georep_mnt_desc_template =
"user-map-root=%s "
")"
"SUB+("
- "log-file=" DEFAULT_LOG_FILE_DIRECTORY "/" GEOREP
+ "log-file=%s/" GEOREP
"*/* "
"log-level=* "
"volfile-id=* "
@@ -242,21 +243,9 @@ const char *georep_mnt_desc_template =
"%s"
")";
-const char *hadoop_mnt_desc_template =
- "SUP("
- "volfile-server=%s "
- "client-pid=%d "
- "volfile-id=%s "
- "user-map-root=%s "
- ")"
- "SUB+("
- "log-file=" DEFAULT_LOG_FILE_DIRECTORY "/" GHADOOP
- "*/* "
- "log-level=* "
- ")";
-
int
-make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user)
+make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user,
+ char *logdir)
{
char *georep_mnt_desc = NULL;
char *meetspec = NULL;
@@ -273,8 +262,11 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user)
int ret = 0;
vols = gf_strdup((char *)volnames);
- if (!vols)
+ if (!vols) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volnames, NULL);
goto out;
+ }
for (vc = 1, p = vols; *p; p++) {
if (*p == ',')
@@ -282,8 +274,10 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user)
}
siz = strlen(volnames) + vc * SLEN("volfile-id=");
meetspec = GF_CALLOC(1, siz + 1, gf_gld_mt_georep_meet_spec);
- if (!meetspec)
+ if (!meetspec) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
for (p = vols;;) {
vol = strtok_r(p, ",", &savetok);
@@ -299,7 +293,7 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user)
}
ret = gf_asprintf(&georep_mnt_desc, georep_mnt_desc_template,
- GF_CLIENT_PID_GSYNCD, user, meetspec);
+ GF_CLIENT_PID_GSYNCD, user, logdir, meetspec);
if (ret == -1) {
georep_mnt_desc = NULL;
goto out;
@@ -322,21 +316,6 @@ out:
return ret;
}
-int
-make_ghadoop_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user,
- char *server)
-{
- char *hadoop_mnt_desc = NULL;
- int ret = 0;
-
- ret = gf_asprintf(&hadoop_mnt_desc, hadoop_mnt_desc_template, server,
- GF_CLIENT_PID_HADOOP, volname, user);
- if (ret == -1)
- return ret;
-
- return parse_mount_pattern_desc(mspec, hadoop_mnt_desc);
-}
-
static gf_boolean_t
match_comp(char *str, char *patcomp)
{
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
index 319e05188b4..20c1347f52f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
@@ -30,10 +30,8 @@ int
parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc);
int
-make_georep_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user);
-int
-make_ghadoop_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user,
- char *server);
+make_georep_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user,
+ char *logdir);
int
glusterd_do_mount(char *label, dict_t *argdict, char **path, int *op_errno);
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
index 9bc46a9bb63..4908dbbc213 100644
--- a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
@@ -8,9 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
-#include "syscall.h"
+#ifdef BUILD_GNFS
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -223,3 +225,4 @@ out:
gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
return ret;
}
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
index 47e89830f55..6bfdde95749 100644
--- a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
@@ -13,6 +13,7 @@
#include "glusterd-svc-mgmt.h"
+#ifdef BUILD_GNFS
void
glusterd_nfssvc_build(glusterd_svc_t *svc);
@@ -22,4 +23,5 @@ glusterd_nfssvc_init(glusterd_svc_t *svc);
int
glusterd_nfssvc_reconfigure();
+#endif /* BUILD_GNFS */
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index e72bec4e55c..c537fc33a85 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -14,46 +14,40 @@
#include <sys/mount.h>
#include <libgen.h>
-#include "compat-uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "fnmatch.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "protocol-common.h"
#include "glusterd.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "glusterd-sm.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/list.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
-#include "glusterd-hooks.h"
-#include "glusterd-volgen.h"
#include "glusterd-locks.h"
-#include "glusterd-messages.h"
-#include "glusterd-utils.h"
#include "glusterd-quota.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "cli1-xdr.h"
-#include "common-utils.h"
-#include "run.h"
#include "glusterd-snapshot-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
#include "glusterd-shd-svc.h"
-#include "glusterd-nfs-svc.h"
#include "glusterd-quotad-svc.h"
#include "glusterd-server-quorum.h"
-#include "glusterd-volgen.h"
#include <sys/types.h>
#include <signal.h>
#include <sys/wait.h>
#include "glusterd-gfproxyd-svc-helper.h"
+#define len_strcmp(key, len, str) \
+ ((len == SLEN(str)) && (strcmp(key, str) == 0))
+
extern char local_node_hostname[PATH_MAX];
static int
glusterd_set_shared_storage(dict_t *dict, char *key, char *value,
@@ -67,7 +61,7 @@ glusterd_set_shared_storage(dict_t *dict, char *key, char *value,
* It's important that every value have a default, or have a special handler
* in glusterd_get_global_options_for_all_vols, or else we might crash there.
*/
-glusterd_all_vol_opts valid_all_vol_opts[] = {
+const glusterd_all_vol_opts valid_all_vol_opts[] = {
{GLUSTERD_QUORUM_RATIO_KEY, "51"},
{GLUSTERD_SHARED_STORAGE_KEY, "disable"},
/* This one actually gets filled in dynamically. */
@@ -85,7 +79,8 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
* can be attached per process.
* TBD: Discuss the default value for this. Maybe this should be a
* dynamic value depending on the memory specifications per node */
- {GLUSTERD_BRICKMUX_LIMIT_KEY, "0"},
+ {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
+ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},
{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},
{GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
{NULL},
@@ -97,14 +92,6 @@ glusterd_op_info_t opinfo = {
{0},
};
-int
-glusterd_bricks_select_rebalance_volume(dict_t *dict, char **op_errstr,
- struct cds_list_head *selected);
-
-int
-glusterd_bricks_select_tier_volume(dict_t *dict, char **op_errstr,
- struct cds_list_head *selected);
-
int32_t
glusterd_txn_opinfo_dict_init()
{
@@ -119,6 +106,7 @@ glusterd_txn_opinfo_dict_init()
priv->glusterd_txn_opinfo = dict_new();
if (!priv->glusterd_txn_opinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -191,8 +179,10 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id)
GF_ASSERT(dict);
*txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
- if (!*txn_id)
+ if (!*txn_id) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
if (priv->op_version < GD_OP_VERSION_3_6_0)
gf_uuid_copy(**txn_id, priv->global_txn_id);
@@ -403,7 +393,7 @@ glusterd_op_sm_event_name_get(int event)
return glusterd_op_sm_event_names[event];
}
-void
+static void
glusterd_destroy_lock_ctx(glusterd_op_lock_ctx_t *ctx)
{
if (!ctx)
@@ -422,56 +412,49 @@ glusterd_set_volume_status(glusterd_volinfo_t *volinfo,
static int
glusterd_op_sm_inject_all_acc(uuid_t *txn_id)
{
- int32_t ret = -1;
+ int ret = -1;
ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, txn_id, NULL);
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
}
static int
-glusterd_check_bitrot_cmd(char *key, char *value, char *errstr, size_t size)
+glusterd_check_bitrot_cmd(char *key, const int keylen, char *errstr,
+ const size_t size)
{
int ret = -1;
- if ((!strncmp(key, "bitrot", SLEN("bitrot"))) ||
- (!strncmp(key, "features.bitrot", SLEN("features.bitrot")))) {
+ if (len_strcmp(key, keylen, "bitrot") ||
+ len_strcmp(key, keylen, "features.bitrot")) {
snprintf(errstr, size,
- " 'gluster volume set <VOLNAME> %s' "
- "is invalid command. Use 'gluster volume bitrot "
- "<VOLNAME> {enable|disable}' instead.",
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> {enable|disable}'"
+ " instead.",
key);
- ret = -1;
goto out;
- } else if ((!strncmp(key, "scrub-freq", SLEN("scrub-freq"))) ||
- (!strncmp(key, "features.scrub-freq",
- SLEN("features.scrub-freq")))) {
+ } else if (len_strcmp(key, keylen, "scrub-freq") ||
+ len_strcmp(key, keylen, "features.scrub-freq")) {
snprintf(errstr, size,
- " 'gluster volume "
- "set <VOLNAME> %s' is invalid command. Use 'gluster "
- "volume bitrot <VOLNAME> scrub-frequency"
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub-frequency"
" {hourly|daily|weekly|biweekly|monthly}' instead.",
key);
- ret = -1;
goto out;
- } else if ((!strncmp(key, "scrub", SLEN("scrub"))) ||
- (!strncmp(key, "features.scrub", SLEN("features.scrub")))) {
+ } else if (len_strcmp(key, keylen, "scrub") ||
+ len_strcmp(key, keylen, "features.scrub")) {
snprintf(errstr, size,
- " 'gluster volume set <VOLNAME> %s' is "
- "invalid command. Use 'gluster volume bitrot "
- "<VOLNAME> scrub {pause|resume}' instead.",
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub {pause|resume}'"
+ " instead.",
key);
- ret = -1;
goto out;
- } else if ((!strncmp(key, "scrub-throttle", SLEN("scrub-throttle"))) ||
- (!strncmp(key, "features.scrub-throttle",
- SLEN("features.scrub-throttle")))) {
+ } else if (len_strcmp(key, keylen, "scrub-throttle") ||
+ len_strcmp(key, keylen, "features.scrub-throttle")) {
snprintf(errstr, size,
- " 'gluster volume set <VOLNAME> %s' is "
- "invalid command. Use 'gluster volume bitrot "
- "<VOLNAME> scrub-throttle {lazy|normal|aggressive}' "
- "instead.",
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub-throttle "
+ " {lazy|normal|aggressive}' instead.",
key);
- ret = -1;
goto out;
}
@@ -481,61 +464,52 @@ out:
}
static int
-glusterd_check_quota_cmd(char *key, char *value, char *errstr, size_t size)
+glusterd_check_quota_cmd(char *key, const int keylen, char *value, char *errstr,
+ size_t size)
{
int ret = -1;
gf_boolean_t b = _gf_false;
- if ((strcmp(key, "quota") == 0) || (strcmp(key, "features.quota") == 0)) {
+ if (len_strcmp(key, keylen, "quota") ||
+ len_strcmp(key, keylen, "features.quota")) {
ret = gf_string2boolean(value, &b);
if (ret)
goto out;
+ ret = -1;
if (b) {
snprintf(errstr, size,
- " 'gluster "
- "volume set <VOLNAME> %s %s' is "
- "deprecated. Use 'gluster volume "
- "quota <VOLNAME> enable' instead.",
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> enable' instead.",
key, value);
- ret = -1;
- goto out;
} else {
snprintf(errstr, size,
- " 'gluster "
- "volume set <VOLNAME> %s %s' is "
- "deprecated. Use 'gluster volume "
- "quota <VOLNAME> disable' instead.",
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> disable' instead.",
key, value);
- ret = -1;
- goto out;
}
- } else if ((strcmp(key, "inode-quota") == 0) ||
- (strcmp(key, "features.inode-quota") == 0)) {
+ goto out;
+ } else if (len_strcmp(key, keylen, "inode-quota") ||
+ len_strcmp(key, keylen, "features.inode-quota")) {
ret = gf_string2boolean(value, &b);
if (ret)
goto out;
+ ret = -1;
if (b) {
- snprintf(errstr, size,
- " 'gluster "
- "volume set <VOLNAME> %s %s' is "
- "deprecated. Use 'gluster volume "
- "inode-quota <VOLNAME> enable' instead.",
- key, value);
- ret = -1;
- goto out;
+ snprintf(
+ errstr, size,
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume inode-quota <VOLNAME> enable' instead.",
+ key, value);
} else {
/* inode-quota disable not supported,
* use quota disable
*/
snprintf(errstr, size,
- " 'gluster "
- "volume set <VOLNAME> %s %s' is "
- "deprecated. Use 'gluster volume "
- "quota <VOLNAME> disable' instead.",
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> disable' instead.",
key, value);
- ret = -1;
- goto out;
}
+ goto out;
}
ret = 0;
@@ -570,8 +544,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_STOP_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_TERMINATE;
brick_req->name = brickinfo->path;
glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPING);
@@ -580,8 +557,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
brick_req->name = brickinfo->path;
@@ -590,51 +570,70 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_HEAL_VOLUME: {
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
brick_req->name = "";
ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
(int32_t *)&heal_op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=heal-op", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "xl-op", SLEN("xl-op"), heal_op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=xl-op", NULL);
goto out;
+ }
} break;
case GD_OP_STATUS_VOLUME: {
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_STATUS;
brick_req->name = "";
ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
goto out;
+ }
} break;
case GD_OP_REBALANCE:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_TIER_STATUS:
case GD_OP_DEFRAG_BRICK_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_VOLINFO_GET_FAIL, "Volume=%s", volname, NULL);
goto out;
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- snprintf(name, sizeof(name), "%s-tier-dht", volname);
- else
- snprintf(name, sizeof(name), "%s-dht", volname);
+ }
+ snprintf(name, sizeof(name), "%s-dht", volname);
brick_req->name = gf_strdup(name);
break;
@@ -642,8 +641,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
case GD_OP_BARRIER:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_BRICK_BARRIER;
brick_req->name = brickinfo->path;
break;
@@ -653,10 +655,15 @@ glusterd_brick_op_build_payload(glusterd_op_t op,
break;
}
+ brick_req->dict.dict_len = 0;
+ brick_req->dict.dict_val = NULL;
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
*req = brick_req;
ret = 0;
@@ -678,13 +685,19 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
GF_ASSERT(op < GD_OP_MAX);
GF_ASSERT(op > GD_OP_NONE);
GF_ASSERT(req);
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
switch (op) {
case GD_OP_PROFILE_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_PROFILE;
brick_req->name = "";
@@ -694,8 +707,11 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
case GD_OP_STATUS_VOLUME:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_STATUS;
brick_req->name = "";
@@ -706,14 +722,20 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
case GD_OP_SCRUB_ONDEMAND:
brick_req = GF_CALLOC(1, sizeof(*brick_req),
gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
brick_req->op = GLUSTERD_NODE_BITROT;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
brick_req->name = gf_strdup(volname);
break;
@@ -721,11 +743,16 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
goto out;
}
+ brick_req->dict.dict_len = 0;
+ brick_req->dict.dict_val = NULL;
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
*req = brick_req;
ret = 0;
@@ -733,7 +760,7 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
out:
if (ret && brick_req)
GF_FREE(brick_req);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -749,12 +776,14 @@ glusterd_validate_quorum_options(xlator_t *this, char *fullkey, char *value,
goto out;
key = strchr(fullkey, '.');
if (key == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
ret = -1;
goto out;
}
key++;
opt = xlator_volume_option_get(this, key);
if (!opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
ret = -1;
goto out;
}
@@ -775,16 +804,16 @@ glusterd_validate_brick_mx_options(xlator_t *this, char *fullkey, char *value,
}
static int
-glusterd_validate_shared_storage(char *key, char *value, char *errstr)
+glusterd_validate_shared_storage(char *value, char *errstr)
{
int32_t ret = -1;
- int32_t exists = -1;
int32_t count = -1;
char *op = NULL;
char hook_script[PATH_MAX] = "";
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
int32_t len = 0;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
@@ -792,16 +821,9 @@ glusterd_validate_shared_storage(char *key, char *value, char *errstr)
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, key, out);
GF_VALIDATE_OR_GOTO(this->name, value, out);
GF_VALIDATE_OR_GOTO(this->name, errstr, out);
- ret = 0;
-
- if (strcmp(key, GLUSTERD_SHARED_STORAGE_KEY)) {
- goto out;
- }
-
if ((strcmp(value, "enable")) && (strcmp(value, "disable"))) {
snprintf(errstr, PATH_MAX,
"Invalid option(%s). Valid options "
@@ -852,8 +874,8 @@ glusterd_validate_shared_storage(char *key, char *value, char *errstr)
goto out;
}
- exists = glusterd_check_volume_exists(GLUSTER_SHARED_STORAGE);
- if (exists) {
+ ret = glusterd_volinfo_find(GLUSTER_SHARED_STORAGE, &volinfo);
+ if (!ret) {
snprintf(errstr, PATH_MAX,
"Shared storage volume(" GLUSTER_SHARED_STORAGE
") already exists.");
@@ -887,7 +909,7 @@ out:
}
static int
-glusterd_validate_localtime_logging(char *key, char *value, char *errstr)
+glusterd_validate_localtime_logging(char *value, char *errstr)
{
int32_t ret = -1;
xlator_t *this = NULL;
@@ -899,29 +921,11 @@ glusterd_validate_localtime_logging(char *key, char *value, char *errstr)
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- GF_VALIDATE_OR_GOTO(this->name, key, out);
GF_VALIDATE_OR_GOTO(this->name, value, out);
- GF_VALIDATE_OR_GOTO(this->name, errstr, out);
-
- ret = 0;
-
- if (strcmp(key, GLUSTERD_LOCALTIME_LOGGING_KEY)) {
- goto out;
- }
-
- if ((strcmp(value, "enable")) && (strcmp(value, "disable"))) {
- snprintf(errstr, PATH_MAX,
- "Invalid option(%s). Valid options "
- "are 'enable' and 'disable'",
- value);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
- errstr);
- ret = -1;
- }
already_enabled = gf_log_get_localtime();
+ ret = 0;
if (strcmp(value, "enable") == 0) {
gf_log_set_localtime(1);
if (!already_enabled)
@@ -932,6 +936,15 @@ glusterd_validate_localtime_logging(char *key, char *value, char *errstr)
if (already_enabled)
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOCALTIME_LOGGING_DISABLE,
"localtime logging disable");
+ } else {
+ ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, errstr, out);
+ snprintf(errstr, PATH_MAX,
+ "Invalid option(%s). Valid options "
+ "are 'enable' and 'disable'",
+ value);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ errstr);
}
out:
@@ -939,7 +952,7 @@ out:
}
static int
-glusterd_validate_daemon_log_level(char *key, char *value, char *errstr)
+glusterd_validate_daemon_log_level(char *value, char *errstr)
{
int32_t ret = -1;
xlator_t *this = NULL;
@@ -951,19 +964,15 @@ glusterd_validate_daemon_log_level(char *key, char *value, char *errstr)
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, key, out);
GF_VALIDATE_OR_GOTO(this->name, value, out);
- GF_VALIDATE_OR_GOTO(this->name, errstr, out);
ret = 0;
- if (strcmp(key, GLUSTERD_DAEMON_LOG_LEVEL_KEY)) {
- goto out;
- }
-
if ((strcmp(value, "INFO")) && (strcmp(value, "WARNING")) &&
(strcmp(value, "DEBUG")) && (strcmp(value, "TRACE")) &&
(strcmp(value, "ERROR"))) {
+ ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, errstr, out);
snprintf(errstr, PATH_MAX,
"Invalid option(%s). Valid options "
"are 'INFO' or 'WARNING' or 'ERROR' or 'DEBUG' or "
@@ -971,7 +980,6 @@ glusterd_validate_daemon_log_level(char *key, char *value, char *errstr)
value);
gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
errstr);
- ret = -1;
}
out:
@@ -991,6 +999,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
char keystr[100] = {
0,
};
+ int keystr_len;
int keylen;
char *trash_path = NULL;
int trash_path_len = 0;
@@ -1003,6 +1012,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
glusterd_brickinfo_t *brickinfo = NULL;
dict_t *val_dict = NULL;
gf_boolean_t global_opt = _gf_false;
+ gf_boolean_t key_matched = _gf_false; /* if a key was processed or not*/
glusterd_volinfo_t *voliter = NULL;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
@@ -1015,6 +1025,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
gf_boolean_t check_op_version = _gf_true;
gf_boolean_t trash_enabled = _gf_false;
gf_boolean_t all_vol = _gf_false;
+ struct volopt_map_entry *vmep = NULL;
GF_ASSERT(dict);
this = THIS;
@@ -1022,10 +1033,6 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
priv = this->private;
GF_ASSERT(priv);
- val_dict = dict_new();
- if (!val_dict)
- goto out;
-
/* Check if we can support the required op-version
* This check is not done on the originator glusterd. The originator
* glusterd sets this value.
@@ -1040,8 +1047,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (check_op_version) {
ret = dict_get_uint32(dict, "new-op-version", &new_op_version);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get new_op_version");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=new-op-version", NULL);
goto out;
}
@@ -1049,9 +1056,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
(new_op_version < GD_OP_VERSION_MIN)) {
ret = -1;
snprintf(errstr, sizeof(errstr),
- "Required op_version (%d) is not "
- "supported. Max supported op version "
- "is %d",
+ "Required op_version (%d) is not supported."
+ " Max supported op version is %d",
new_op_version, priv->op_version);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
"%s", errstr);
@@ -1060,7 +1066,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
}
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count);
+ ret = dict_get_int32_sizen(dict, "count", &dict_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Count(dict),not set in Volume-Set");
@@ -1069,12 +1075,12 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (dict_count == 0) {
/*No options would be specified of volume set help */
- if (dict_getn(dict, "help", SLEN("help"))) {
+ if (dict_get_sizen(dict, "help")) {
ret = 0;
goto out;
}
- if (dict_getn(dict, "help-xml", SLEN("help-xml"))) {
+ if (dict_get_sizen(dict, "help-xml")) {
#if (HAVE_LIB_XML)
ret = 0;
goto out;
@@ -1083,8 +1089,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED,
"libxml not present in the system");
*op_errstr = gf_strdup(
- "Error: xml libraries not "
- "present to produce xml-output");
+ "Error: xml libraries not present to produce xml-output");
goto out;
#endif
}
@@ -1095,25 +1100,17 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
goto out;
}
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
}
if (strcasecmp(volname, "all") != 0) {
- exists = glusterd_check_volume_exists(volname);
- if (!exists) {
- snprintf(errstr, sizeof(errstr), FMTSTR_CHECK_VOL_EXISTS, volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s",
- errstr);
- ret = -1;
- goto out;
- }
-
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
+ snprintf(errstr, sizeof(errstr), FMTSTR_CHECK_VOL_EXISTS, volname);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
FMTSTR_CHECK_VOL_EXISTS, volname);
goto out;
@@ -1130,15 +1127,23 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
all_vol = _gf_true;
}
+ val_dict = dict_new();
+ if (!val_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
for (count = 1; ret != 1; count++) {
- global_opt = _gf_false;
- keylen = sprintf(keystr, "key%d", count);
- ret = dict_get_strn(dict, keystr, keylen, &key);
- if (ret)
+ keystr_len = sprintf(keystr, "key%d", count);
+ ret = dict_get_strn(dict, keystr, keystr_len, &key);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", keystr, NULL);
break;
+ }
- keylen = sprintf(keystr, "value%d", count);
- ret = dict_get_strn(dict, keystr, keylen, &value);
+ keystr_len = sprintf(keystr, "value%d", count);
+ ret = dict_get_strn(dict, keystr, keystr_len, &value);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"invalid key,value pair in 'volume set'");
@@ -1146,13 +1151,15 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
goto out;
}
- if (strcmp(key, "config.memory-accounting") == 0) {
+ key_matched = _gf_false;
+ keylen = strlen(key);
+ if (len_strcmp(key, keylen, "config.memory-accounting")) {
+ key_matched = _gf_true;
gf_msg_debug(this->name, 0,
"enabling memory accounting for volume %s", volname);
ret = 0;
- }
-
- if (strcmp(key, "config.transport") == 0) {
+ } else if (len_strcmp(key, keylen, "config.transport")) {
+ key_matched = _gf_true;
gf_msg_debug(this->name, 0, "changing transport-type for volume %s",
volname);
ret = 0;
@@ -1162,23 +1169,31 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
(strcasecmp(value, "tcp,rdma") == 0) ||
(strcasecmp(value, "rdma,tcp") == 0))) {
ret = snprintf(errstr, sizeof(errstr),
- "transport-type %s does "
- "not exist",
- value);
+ "transport-type %s does not exist", value);
/* lets not bother about above return value,
its a failure anyways */
ret = -1;
goto out;
}
+ } else if (len_strcmp(key, keylen, "ganesha.enable")) {
+ key_matched = _gf_true;
+ if (!strcmp(value, "off") == 0) {
+ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ goto out;
+ }
}
- ret = glusterd_check_bitrot_cmd(key, value, errstr, sizeof(errstr));
- if (ret)
- goto out;
-
- ret = glusterd_check_quota_cmd(key, value, errstr, sizeof(errstr));
- if (ret)
- goto out;
+ if (!key_matched) {
+ ret = glusterd_check_bitrot_cmd(key, keylen, errstr,
+ sizeof(errstr));
+ if (ret)
+ goto out;
+ ret = glusterd_check_quota_cmd(key, keylen, value, errstr,
+ sizeof(errstr));
+ if (ret)
+ goto out;
+ }
if (is_key_glusterd_hooks_friendly(key))
continue;
@@ -1205,42 +1220,36 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
goto out;
}
- if (key_fixed)
+ if (key_fixed) {
key = key_fixed;
+ keylen = strlen(key_fixed);
+ }
- if (strcmp(key, "cluster.granular-entry-heal") == 0) {
+ if (len_strcmp(key, keylen, "cluster.granular-entry-heal")) {
/* For granular entry-heal, if the set command was
* invoked through volume-set CLI, then allow the
* command only if the volume is still in 'Created'
* state
*/
- if ((dict_getn(dict, "is-special-key", SLEN("is-special-key")) ==
- NULL) &&
- volinfo && (volinfo->status != GLUSTERD_STATUS_NONE)) {
+ if (volinfo && volinfo->status != GLUSTERD_STATUS_NONE &&
+ (dict_get_sizen(dict, "is-special-key") == NULL)) {
snprintf(errstr, sizeof(errstr),
- " 'gluster "
- "volume set <VOLNAME> %s {enable, "
- "disable}' is not supported. Use "
- "'gluster volume heal <VOLNAME> "
- "granular-entry-heal {enable, "
- "disable}' instead.",
+ " 'gluster volume set <VOLNAME> %s {enable, disable}'"
+ " is not supported."
+ " Use 'gluster volume heal <VOLNAME> "
+ "granular-entry-heal {enable, disable}' instead.",
key);
ret = -1;
goto out;
}
- }
-
- /* Check if the key is cluster.op-version and set
- * local_new_op_version to the value given if possible.
- */
- if (strcmp(key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0) {
+ } else if (len_strcmp(key, keylen, GLUSTERD_GLOBAL_OP_VERSION_KEY)) {
+ /* Check if the key is cluster.op-version and set
+ * local_new_op_version to the value given if possible.
+ */
if (!all_vol) {
ret = -1;
snprintf(errstr, sizeof(errstr),
- "Option \""
- "%s\" is not valid for a single "
- "volume",
- key);
+ "Option \"%s\" is not valid for a single volume", key);
goto out;
}
/* Check if cluster.op-version is the only option being
@@ -1249,9 +1258,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (count != 1) {
ret = -1;
snprintf(errstr, sizeof(errstr),
- "Option \""
- "%s\" cannot be set along with other "
- "options",
+ "Option \"%s\" cannot be set along with other options",
key);
goto out;
}
@@ -1261,10 +1268,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
ret = gf_string2uint(value, &local_key_op_version);
if (ret) {
snprintf(errstr, sizeof(errstr),
- "invalid "
- "number format \"%s\" in option "
- "\"%s\"",
- value, key);
+ "invalid number format \"%s\" in option \"%s\"", value,
+ key);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
errstr);
goto out;
@@ -1274,9 +1279,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
local_key_op_version < GD_OP_VERSION_MIN) {
ret = -1;
snprintf(errstr, sizeof(errstr),
- "Required op_version (%d) is not "
- "supported. Max supported op version "
- "is %d",
+ "Required op_version (%d) is not supported."
+ " Max supported op version is %d",
local_key_op_version, priv->op_version);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
"%s", errstr);
@@ -1308,10 +1312,11 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (ret)
goto out;
- local_key_op_version = glusterd_get_op_version_for_key(key);
+ vmep = gd_get_vmep(key);
+ local_key_op_version = glusterd_get_op_version_from_vmep(vmep);
if (local_key_op_version > local_new_op_version)
local_new_op_version = local_key_op_version;
- if (gd_is_client_option(key) &&
+ if (gd_is_client_option(vmep) &&
(local_key_op_version > local_new_client_op_version))
local_new_client_op_version = local_key_op_version;
@@ -1327,8 +1332,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
ret = dict_get_uint32(dict, keystr, &key_op_version);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Failed to get key-op-version from"
- " dict");
+ "Failed to get key-op-version from dict");
goto out;
}
if (local_key_op_version != key_op_version) {
@@ -1337,60 +1341,63 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
"option: %s op-version mismatch", key);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH,
"%s, required op-version = %" PRIu32
- ", "
- "available op-version = %" PRIu32,
+ ", available op-version = %" PRIu32,
errstr, key_op_version, local_key_op_version);
goto out;
}
}
- if (glusterd_check_globaloption(key))
- global_opt = _gf_true;
+ global_opt = glusterd_check_globaloption(key);
- ret = glusterd_validate_shared_storage(key, value, errstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL,
- "Failed to validate shared "
- "storage volume options");
- goto out;
- }
-
- ret = glusterd_validate_localtime_logging(key, value, errstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL,
- "Failed to validate localtime "
- "logging volume options");
- goto out;
- }
-
- ret = glusterd_validate_daemon_log_level(key, value, errstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL,
- "Failed to validate daemon-log-level volume "
- "options");
- goto out;
- }
-
- if (volinfo) {
- ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_TRASH, &val_dup);
- if (val_dup) {
- ret = gf_string2boolean(val_dup, &trash_enabled);
- if (ret)
- goto out;
+ if (len_strcmp(key, keylen, GLUSTERD_SHARED_STORAGE_KEY)) {
+ ret = glusterd_validate_shared_storage(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate shared storage volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, GLUSTERD_LOCALTIME_LOGGING_KEY)) {
+ ret = glusterd_validate_localtime_logging(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate localtime logging volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, GLUSTERD_DAEMON_LOG_LEVEL_KEY)) {
+ ret = glusterd_validate_daemon_log_level(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate daemon-log-level volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, "features.trash-dir")) {
+ if (volinfo) {
+ ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_TRASH,
+ &val_dup);
+ if (!ret && val_dup) {
+ ret = gf_string2boolean(val_dup, &trash_enabled);
+ if (ret)
+ goto out;
+ }
+ }
+ if (!trash_enabled) {
+ snprintf(errstr, sizeof(errstr),
+ "Trash translator is not enabled. "
+ "Use volume set %s trash on",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set': %s", errstr);
+ ret = -1;
+ goto out;
}
- }
-
- if (!strcmp(key, "features.trash-dir") && trash_enabled) {
if (strchr(value, '/')) {
snprintf(errstr, sizeof(errstr),
"Path is not allowed as option");
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
- "Unable to set the options in 'volume "
- "set': %s",
- errstr);
+ "Unable to set the options in 'volume set': %s", errstr);
ret = -1;
goto out;
}
@@ -1411,16 +1418,13 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
snprintf(errstr, sizeof(errstr), "Path %s exists",
value);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
- "Unable to set the "
- "options in "
- "'volume set': %s",
+ "Unable to set the options in 'volume set': %s",
errstr);
ret = -1;
goto out;
} else {
gf_msg_debug(this->name, 0,
- "Directory with given "
- "name does not exists,"
+ "Directory with given name does not exist,"
" continuing");
}
@@ -1431,9 +1435,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
snprintf(errstr, sizeof(errstr),
"One or more bricks are down");
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
- "Unable to set the "
- "options in "
- "'volume set': %s",
+ "Unable to set the options in 'volume set': %s",
errstr);
ret = -1;
goto out;
@@ -1442,22 +1444,11 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (trash_path) {
GF_FREE(trash_path);
trash_path = NULL;
- trash_path_len = 0;
}
}
- } else if (!strcmp(key, "features.trash-dir") && !trash_enabled) {
- snprintf(errstr, sizeof(errstr),
- "Trash translator is not enabled. Use "
- "volume set %s trash on",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
- "Unable to set the options in 'volume "
- "set': %s",
- errstr);
- ret = -1;
- goto out;
}
- ret = dict_set_str(val_dict, key, value);
+
+ ret = dict_set_strn(val_dict, key, keylen, value);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
@@ -1482,12 +1473,11 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
- "Could not create "
- "temp volfile, some option failed: %s",
+ "Could not create temp volfile, some option failed: %s",
*op_errstr);
goto out;
}
- dict_del(val_dict, key);
+ dict_deln(val_dict, key, keylen);
if (key_fixed) {
GF_FREE(key_fixed);
@@ -1501,7 +1491,6 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
volname, local_new_client_op_version, op_errstr);
if (ret)
goto out;
-
cont:
if (origin_glusterd) {
ret = dict_set_uint32(dict, "new-op-version", local_new_op_version);
@@ -1516,8 +1505,7 @@ cont:
* TODO: Remove this and the other places this is referred once
* 3.3.x compatibility is not required
*/
- ret = dict_set_int32n(dict, "check-op-version",
- SLEN("check-op-version"), 1);
+ ret = dict_set_int32_sizen(dict, "check-op-version", 1);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set check-op-version in dict");
@@ -1549,81 +1537,6 @@ out:
}
return ret;
}
-static int
-glusterd_water_limit_check(glusterd_volinfo_t *volinfo, gf_boolean_t is_hi,
- char **op_errstr)
-{
- int ret = -1;
- char *default_value = NULL;
- char *temp = NULL;
- uint64_t wm = 0;
- uint64_t default_wm = 0;
- struct volopt_map_entry *vmap = NULL;
- xlator_t *this = NULL;
- extern struct volopt_map_entry glusterd_volopt_map[];
- char msg[2048] = {0};
-
- this = THIS;
- GF_ASSERT(this);
-
- if (is_hi)
- ret = glusterd_volinfo_get(volinfo, "cluster.watermark-low", &temp);
- else
- ret = glusterd_volinfo_get(volinfo, "cluster.watermark-hi", &temp);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
- "failed to get watermark");
- goto out;
- }
-
- gf_string2bytesize_uint64(temp, &wm);
-
- if (is_hi)
- for (vmap = glusterd_volopt_map; vmap->key; vmap++) {
- if (strcmp(vmap->key, "cluster.watermark-hi") == 0)
- default_value = vmap->value;
- }
- else
- for (vmap = glusterd_volopt_map; vmap->key; vmap++) {
- if (strcmp(vmap->key, "cluster.watermark-low") == 0)
- default_value = vmap->value;
- }
-
- gf_string2bytesize_uint64(default_value, &default_wm);
-
- if (is_hi) {
- if (default_wm <= wm) {
- snprintf(msg, sizeof(msg),
- "Resetting hi-watermark "
- "to default will make it lower or equal to "
- "the low-watermark, which is an invalid "
- "configuration state. Please lower the "
- "low-watermark first to the desired value "
- "and then reset the hi-watermark.");
- ret = -1;
- goto out;
- }
- } else {
- if (default_wm >= wm) {
- snprintf(msg, sizeof(msg),
- "Resetting low-watermark "
- "to default will make it higher or equal to "
- "the hi-watermark, which is an invalid "
- "configuration state. Please raise the "
- "hi-watermark first to the desired value "
- "and then reset the low-watermark.");
- ret = -1;
- goto out;
- }
- }
-out:
- if (msg[0] != '\0') {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TIER_WATERMARK_RESET_FAIL,
- "%s", msg);
- *op_errstr = gf_strdup(msg);
- }
- return ret;
-}
static int
glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
@@ -1653,12 +1566,6 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
}
if (strcasecmp(volname, "all") != 0) {
- exists = glusterd_check_volume_exists(volname);
- if (!exists) {
- snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
- ret = -1;
- goto out;
- }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
@@ -1677,18 +1584,26 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
goto out;
}
+ /* *
+ * If key ganesha.enable is set, then volume should be unexported from
+ * ganesha server. Also it is a volume-level option, perform only when
+ * volume name not equal to "all"(in other words if volinfo != NULL)
+ */
+ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) {
+ if (glusterd_check_ganesha_export(volinfo)) {
+ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
+ }
+ }
+
if (strcmp(key, "all")) {
exists = glusterd_check_option_exists(key, &key_fixed);
if (exists == -1) {
ret = -1;
goto out;
- } else if (strcmp(key, "cluster.watermark-low") == 0) {
- ret = glusterd_water_limit_check(volinfo, _gf_false, op_errstr);
- } else if (strcmp(key, "cluster.watermark-hi") == 0) {
- ret = glusterd_water_limit_check(volinfo, _gf_true, op_errstr);
}
- if (ret)
- goto out;
if (!exists) {
ret = snprintf(msg, sizeof(msg), "Option %s does not exist", key);
@@ -1747,18 +1662,22 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
int ret = -1;
char *volname = NULL;
char *hostname = NULL;
- gf_boolean_t exists = _gf_false;
glusterd_peerinfo_t *peerinfo = NULL;
char msg[2048] = {
0,
};
glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
if (ret) {
snprintf(msg, sizeof(msg),
"hostname couldn't be "
"retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -1767,42 +1686,45 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
// volname is not present in case of sync all
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (!ret) {
- exists = glusterd_check_volume_exists(volname);
- if (!exists) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
snprintf(msg, sizeof(msg),
"Volume %s "
"does not exist",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_FOUND,
+ "Volume=%s", volname, NULL);
*op_errstr = gf_strdup(msg);
- ret = -1;
goto out;
}
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret)
- goto out;
-
- } else {
- ret = 0;
}
} else {
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, hostname);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s, is not a friend", hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND,
+ "Peer_name=%s", hostname, NULL);
*op_errstr = gf_strdup(msg);
+ goto out;
} else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
snprintf(msg, sizeof(msg),
"%s, is not connected at "
"the moment",
hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_DISCONNECTED,
+ "Peer_name=%s", hostname, NULL);
*op_errstr = gf_strdup(msg);
- ret = -1;
+ goto out;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
}
out:
@@ -1826,7 +1748,9 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
dict_t *vol_opts = NULL;
+#ifdef BUILD_GNFS
gf_boolean_t nfs_disabled = _gf_false;
+#endif
gf_boolean_t shd_enabled = _gf_false;
GF_ASSERT(dict);
@@ -1836,8 +1760,11 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
GF_ASSERT(priv);
ret = dict_get_uint32(dict, "cmd", &cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
if (cmd & GF_CLI_STATUS_ALL)
goto out;
@@ -1848,17 +1775,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"The cluster is operating at "
"version 1. Getting the status of quotad is not "
"allowed in this state.");
- ret = -1;
- goto out;
- }
-
- if ((cmd & GF_CLI_STATUS_TIERD) &&
- (priv->op_version < GD_OP_VERSION_3_10_0)) {
- snprintf(msg, sizeof(msg),
- "The cluster is operating at "
- "version less than %d. Getting the "
- "status of tierd is not allowed in this state.",
- GD_OP_VERSION_3_10_0);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_GET_STAT_FAIL,
+ msg, NULL);
ret = -1;
goto out;
}
@@ -1870,6 +1788,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"version less than %d. Getting the "
"status of snapd is not allowed in this state.",
GD_OP_VERSION_3_6_0);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_STATUS_FAIL, msg,
+ NULL);
ret = -1;
goto out;
}
@@ -1884,47 +1804,61 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volume=%s", volname, NULL);
ret = -1;
goto out;
}
ret = glusterd_validate_volume_id(dict, volinfo);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VALIDATE_FAILED, NULL);
goto out;
+ }
ret = glusterd_is_volume_started(volinfo);
if (!ret) {
snprintf(msg, sizeof(msg), "Volume %s is not started", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_STARTED,
+ "Volume=%s", volname, NULL);
ret = -1;
goto out;
}
vol_opts = volinfo->dict;
- if ((cmd & GF_CLI_STATUS_NFS) != 0) {
- nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
- _gf_false);
- if (nfs_disabled) {
- ret = -1;
- snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s",
- volname);
- goto out;
- }
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if ((cmd & GF_CLI_STATUS_SHD) != 0) {
if (glusterd_is_shd_compatible_volume(volinfo)) {
shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
} else {
ret = -1;
snprintf(msg, sizeof(msg), "Volume %s is not Self-heal compatible",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_SHD_NOT_COMP,
+ "Volume=%s", volname, NULL);
goto out;
}
if (!shd_enabled) {
ret = -1;
snprintf(msg, sizeof(msg),
"Self-heal Daemon is disabled for volume %s", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SELF_HEALD_DISABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
+#ifdef BUILD_GNFS
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (nfs_disabled) {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_NFS_GANESHA_DISABLED, "Volume=%s", volname, NULL);
+ goto out;
+ }
+#endif
} else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
if (!glusterd_is_volume_quota_enabled(volinfo)) {
ret = -1;
@@ -1932,6 +1866,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"quota enabled",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_DISABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
@@ -1941,15 +1877,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"bitrot enabled",
volname);
- goto out;
- }
- } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) {
- if (!glusterd_is_tierd_enabled(volinfo)) {
- ret = -1;
- snprintf(msg, sizeof(msg),
- "Volume %s does not have "
- "tierd enabled.",
- volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
@@ -1960,6 +1889,10 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"bitrot enabled. Scrubber will be enabled "
"automatically if bitrot is enabled",
volname);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Scrubber will be enabled automatically if bitrot is enabled",
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
@@ -1969,12 +1902,17 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"Volume %s does not have "
"uss enabled",
volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAPD_NOT_RUNNING,
+ "Volume=%s", volname, NULL);
goto out;
}
} else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
goto out;
+ }
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
_gf_false);
@@ -1983,6 +1921,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
"No brick %s in"
" volume %s",
brick, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_FOUND,
+ "Brick=%s, Volume=%s", brick, volname, NULL);
ret = -1;
goto out;
}
@@ -2002,12 +1942,11 @@ out:
return ret;
}
-static int
+int
glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr)
{
int ret = -1;
char *volname = NULL;
- gf_boolean_t exists = _gf_false;
char msg[2048] = {
0,
};
@@ -2020,14 +1959,12 @@ glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((!exists) || (ret < 0)) {
+ if (ret) {
snprintf(msg, sizeof(msg),
"Volume %s, "
"doesn't exist",
volname);
- ret = -1;
goto out;
}
@@ -2050,8 +1987,8 @@ glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr)
ret = -1;
goto out;
}
- }
- if ((GF_CLI_STATS_STOP == stats_op) || (GF_CLI_STATS_INFO == stats_op)) {
+ } else if ((GF_CLI_STATS_STOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
if (_gf_false == glusterd_is_profile_on(volinfo)) {
snprintf(msg, sizeof(msg),
"Profile on Volume %s is"
@@ -2191,17 +2128,16 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
if (ret)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->reconfigure(volinfo);
- if (ret)
- goto out;
- }
svc = &(volinfo->gfproxyd.svc);
ret = svc->reconfigure(volinfo);
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -2216,7 +2152,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret)
goto out;
}
@@ -2273,8 +2209,10 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = -1;
dup_opt = dict_new();
- if (!dup_opt)
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
if (!all) {
dict_copy(conf->opts, dup_opt);
dict_del(dup_opt, key);
@@ -2285,8 +2223,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
+ }
ret = glusterd_store_options(this, dup_opt);
if (ret)
@@ -2297,9 +2238,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
- else
+ } else
next_version = NULL;
if (!all) {
@@ -2393,6 +2336,16 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr)
}
}
+ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) {
+ if (glusterd_check_ganesha_export(volinfo) &&
+ is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "off", op_rspstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
+ }
+ }
+
out:
GF_FREE(key_fixed);
if (quorum_action)
@@ -2435,6 +2388,7 @@ glusterd_start_bricks(glusterd_volinfo_t *volinfo)
if (!brickinfo->start_triggered) {
pthread_mutex_lock(&brickinfo->restart_mutex);
{
+ /* coverity[SLEEP] */
ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
_gf_false);
}
@@ -2572,8 +2526,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
conf = this->private;
ret = dict_get_strn(dict, "key1", SLEN("key1"), &key);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=key1", NULL);
goto out;
+ }
ret = dict_get_strn(dict, "value1", SLEN("value1"), &value);
if (ret) {
@@ -2648,18 +2605,16 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->reconfigure(volinfo);
- if (ret)
- goto out;
- }
-
svc = &(volinfo->gfproxyd.svc);
ret = svc->reconfigure(volinfo);
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -2673,7 +2628,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
}
}
if (svcs_reconfigure) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(NULL);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart "
@@ -2694,12 +2649,17 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
}
ret = -1;
dup_opt = dict_new();
- if (!dup_opt)
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
dict_copy(conf->opts, dup_opt);
ret = dict_set_str(dup_opt, key, value);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version);
if (ret)
@@ -2707,8 +2667,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
+ }
ret = glusterd_store_options(this, dup_opt);
if (ret)
@@ -2719,9 +2682,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
goto out;
- else
+ } else
next_version = NULL;
dup_value = gf_strdup(value);
@@ -2729,9 +2694,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
goto out;
ret = dict_set_dynstr(conf->opts, key, dup_value);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
- else
+ } else
dup_value = NULL; /* Protect the allocation from GF_FREE */
out:
@@ -2810,7 +2777,7 @@ glusterd_set_shared_storage(dict_t *dict, char *key, char *value,
goto out;
}
- ret = mkdir_p(GLUSTER_SHARED_STORAGE_BRICK_DIR, 0777, _gf_true);
+ ret = mkdir_p(GLUSTER_SHARED_STORAGE_BRICK_DIR, 0755, _gf_true);
if (-1 == ret) {
snprintf(errstr, PATH_MAX,
"Failed to create shared "
@@ -2944,6 +2911,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
if (strcmp(key, "config.memory-accounting") == 0) {
ret = gf_string2boolean(value, &volinfo->memory_accounting);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid value in key-value pair.");
+ goto out;
+ }
}
if (strcmp(key, "config.transport") == 0) {
@@ -2964,6 +2936,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
}
}
+ ret = glusterd_check_ganesha_cmd(key, value, errstr, dict);
+ if (ret == -1)
+ goto out;
+
if (!is_key_glusterd_hooks_friendly(key)) {
ret = glusterd_check_option_exists(key, &key_fixed);
GF_ASSERT(ret);
@@ -3043,17 +3019,16 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
if (ret)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->reconfigure(volinfo);
- if (ret)
- goto out;
- }
svc = &(volinfo->gfproxyd.svc);
ret = svc->reconfigure(volinfo);
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -3069,7 +3044,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart services");
@@ -3090,18 +3065,16 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->reconfigure(volinfo);
- if (ret)
- goto out;
- }
-
svc = &(volinfo->gfproxyd.svc);
ret = svc->reconfigure(volinfo);
if (ret)
goto out;
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -3117,7 +3090,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
"Unable to restart services");
@@ -3160,6 +3133,8 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
snprintf(msg, sizeof(msg),
"hostname couldn't be "
"retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
*op_errstr = gf_strdup(msg);
goto out;
}
@@ -3184,6 +3159,7 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (!rsp_dict) {
// this should happen only on source
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = 0;
goto out;
}
@@ -3252,7 +3228,7 @@ glusterd_remove_profile_volume_options(glusterd_volinfo_t *volinfo)
dict_del_sizen(volinfo->dict, VKEY_DIAG_CNT_FOP_HITS);
}
-static int
+int
glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
int ret = -1;
@@ -3324,7 +3300,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret)
goto out;
}
@@ -3344,10 +3320,11 @@ _add_remove_bricks_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo,
int ret = -1;
int count = 0;
int i = 0;
- char brick_key[1024] = {
+ char brick_key[16] = {
0,
};
- char dict_key[1024] = {
+ char dict_key[64] = {
+ /* dict_key is small as prefix is up to 32 chars */
0,
};
int keylen;
@@ -3412,7 +3389,7 @@ static int
_add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
{
int ret = -1;
- char key[64] = {
+ char key[32] = {
0,
};
int keylen;
@@ -3427,7 +3404,6 @@ _add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
GF_ASSERT(this);
switch (op) {
- case GD_OP_REMOVE_TIER_BRICK:
case GD_OP_REMOVE_BRICK:
snprintf(key, sizeof(key), "task%d", index);
ret = _add_remove_bricks_to_dict(dict, volinfo, key);
@@ -3437,7 +3413,6 @@ _add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
"Failed to add remove bricks to dict");
goto out;
}
- case GD_OP_TIER_MIGRATE:
case GD_OP_REBALANCE:
uuid_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id));
status = volinfo->rebal.defrag_status;
@@ -3492,25 +3467,12 @@ glusterd_aggregate_task_status(dict_t *rsp_dict, glusterd_volinfo_t *volinfo)
int ret = -1;
int tasks = 0;
xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
this = THIS;
GF_ASSERT(this);
- conf = this->private;
if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- if (conf->op_version > GD_OP_VERSION_3_10_0)
- goto done;
- if (volinfo->rebal.op == GD_OP_REMOVE_BRICK)
- ret = _add_task_to_dict(rsp_dict, volinfo,
- GD_OP_REMOVE_TIER_BRICK, tasks);
- else if (volinfo->rebal.op == GD_OP_REBALANCE)
- ret = _add_task_to_dict(rsp_dict, volinfo, GD_OP_TIER_MIGRATE,
- tasks);
- } else
- ret = _add_task_to_dict(rsp_dict, volinfo, volinfo->rebal.op,
- tasks);
+ ret = _add_task_to_dict(rsp_dict, volinfo, volinfo->rebal.op, tasks);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
@@ -3519,15 +3481,12 @@ glusterd_aggregate_task_status(dict_t *rsp_dict, glusterd_volinfo_t *volinfo)
}
tasks++;
}
-done:
ret = dict_set_int32n(rsp_dict, "tasks", SLEN("tasks"), tasks);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Error setting tasks count in dict");
goto out;
}
- ret = 0;
-
out:
return ret;
}
@@ -3539,7 +3498,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
int node_count = 0;
int brick_index = -1;
int other_count = 0;
- int hot_brick_count = -1;
int other_index = 0;
uint32_t cmd = 0;
char *volname = NULL;
@@ -3549,9 +3507,12 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_conf_t *priv = NULL;
dict_t *vol_opts = NULL;
+#ifdef BUILD_GNFS
gf_boolean_t nfs_disabled = _gf_false;
+#endif
gf_boolean_t shd_enabled = _gf_false;
gf_boolean_t origin_glusterd = _gf_false;
+ int snapd_enabled, bitrot_enabled, volume_quota_enabled;
this = THIS;
GF_ASSERT(this);
@@ -3599,29 +3560,22 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
vol_opts = volinfo->dict;
- if ((cmd & GF_CLI_STATUS_NFS) != 0) {
- ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict, 0,
- vol_opts);
- if (ret)
- goto out;
- other_count++;
- node_count++;
-
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0,
+ if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
vol_opts);
if (ret)
goto out;
other_count++;
node_count++;
-
- } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
- ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
+#ifdef BUILD_GNFS
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict, 0,
vol_opts);
if (ret)
goto out;
other_count++;
node_count++;
+#endif
} else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict, 0,
vol_opts);
@@ -3636,14 +3590,14 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
other_count++;
node_count++;
- } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) {
- ret = glusterd_add_tierd_to_dict(volinfo, rsp_dict, other_index);
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict, other_index);
if (ret)
goto out;
other_count++;
node_count++;
- } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
- ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict, other_index);
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index);
if (ret)
goto out;
other_count++;
@@ -3672,6 +3626,15 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
} else {
+ snapd_enabled = glusterd_is_snapd_enabled(volinfo);
+ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+#ifdef BUILD_GNFS
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+#endif
+ volume_quota_enabled = glusterd_is_volume_quota_enabled(volinfo);
+ bitrot_enabled = glusterd_is_bitrot_enabled(volinfo);
+
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
brick_index++;
@@ -3690,7 +3653,7 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
other_index = brick_index + 1;
- if (glusterd_is_snapd_enabled(volinfo)) {
+ if (snapd_enabled) {
ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict,
other_index);
if (ret)
@@ -3700,18 +3663,18 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
node_count++;
}
- if (glusterd_is_tierd_enabled(volinfo)) {
- ret = glusterd_add_tierd_to_dict(volinfo, rsp_dict,
- other_index);
- if (ret)
- goto out;
- other_count++;
- other_index++;
- node_count++;
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ if (shd_enabled) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict,
+ other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ other_index++;
+ node_count++;
+ }
}
-
- nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
- _gf_false);
+#ifdef BUILD_GNFS
if (!nfs_disabled) {
ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict,
other_index, vol_opts);
@@ -3721,20 +3684,8 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
other_count++;
node_count++;
}
-
- if (glusterd_is_shd_compatible_volume(volinfo))
- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
- if (shd_enabled) {
- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict,
- other_index, vol_opts);
- if (ret)
- goto out;
- other_count++;
- node_count++;
- other_index++;
- }
-
- if (glusterd_is_volume_quota_enabled(volinfo)) {
+#endif
+ if (volume_quota_enabled) {
ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict,
other_index, vol_opts);
if (ret)
@@ -3744,7 +3695,7 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
other_index++;
}
- if (glusterd_is_bitrot_enabled(volinfo)) {
+ if (bitrot_enabled) {
ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict,
other_index, vol_opts);
if (ret)
@@ -3752,11 +3703,8 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
other_count++;
node_count++;
other_index++;
- }
-
- /* For handling scrub status. Scrub daemon will be
- * running automatically when bitrot is enable*/
- if (glusterd_is_bitrot_enabled(volinfo)) {
+ /* For handling scrub status. Scrub daemon will be
+ * running automatically when bitrot is enable */
ret = glusterd_add_node_to_dict(priv->scrub_svc.name, rsp_dict,
other_index, vol_opts);
if (ret)
@@ -3767,35 +3715,31 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- hot_brick_count = volinfo->tier_info.hot_brick_count;
- ret = dict_set_int32n(rsp_dict, "hot_brick_count", SLEN("hot_brick_count"),
- hot_brick_count);
- if (ret)
- goto out;
-
ret = dict_set_int32n(rsp_dict, "type", SLEN("type"), volinfo->type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=type", NULL);
goto out;
+ }
ret = dict_set_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"),
brick_index);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting brick-index-max to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-index-max", NULL);
goto out;
}
ret = dict_set_int32n(rsp_dict, "other-count", SLEN("other-count"),
other_count);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting other-count to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=other-count", NULL);
goto out;
}
ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), node_count);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Error setting node count to dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
}
@@ -3861,7 +3805,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
priv = this->private;
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -3882,7 +3826,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = proc->fn(NULL, this, peerinfo);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_WARNING, 0,
GD_MSG_LOCK_REQ_SEND_FAIL,
"Failed to send lock request "
@@ -3903,7 +3847,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
dict_unref(dict);
@@ -3912,7 +3856,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
ret = proc->fn(NULL, this, dict);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_WARNING, 0,
GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL,
"Failed to send mgmt_v3 lock "
@@ -3928,7 +3872,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
}
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
opinfo.pending_count = pending_count;
@@ -3964,7 +3908,7 @@ glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx)
priv = this->private;
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -4036,7 +3980,7 @@ glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx)
}
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
opinfo.pending_count = pending_count;
@@ -4272,8 +4216,10 @@ glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr)
this = THIS;
GF_ASSERT(this);
- if (!dict || !volname)
+ if (!dict || !volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
@@ -4423,9 +4369,7 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx)
req_dict = dict_ref(dict);
} break;
- case GD_OP_REMOVE_BRICK:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_REMOVE_TIER_BRICK: {
+ case GD_OP_REMOVE_BRICK: {
dict_t *dict = ctx;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
@@ -4477,8 +4421,6 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_BARRIER:
case GD_OP_BITROT:
- case GD_OP_TIER_START_STOP:
- case GD_OP_TIER_STATUS:
case GD_OP_SCRUB_STATUS:
case GD_OP_SCRUB_ONDEMAND:
case GD_OP_RESET_BRICK: {
@@ -4494,7 +4436,8 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_SYNC_VOLUME:
case GD_OP_COPY_FILE:
- case GD_OP_SYS_EXEC: {
+ case GD_OP_SYS_EXEC:
+ case GD_OP_GANESHA: {
dict_copy(dict, req_dict);
} break;
@@ -4589,7 +4532,7 @@ glusterd_op_ac_send_stage_op(glusterd_op_sm_event_t *event, void *ctx)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -4609,7 +4552,7 @@ glusterd_op_ac_send_stage_op(glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to "
"set peerinfo");
@@ -4629,7 +4572,7 @@ glusterd_op_ac_send_stage_op(glusterd_op_sm_event_t *event, void *ctx)
pending_count++;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
opinfo.pending_count = pending_count;
out:
@@ -4674,7 +4617,7 @@ glusterd_op_volume_dict_uuid_to_hostname(dict_t *dict, const char *key_fmt,
{
int ret = -1;
int i = 0;
- char key[1024];
+ char key[128];
int keylen;
char *uuid_str = NULL;
uuid_t uuid = {
@@ -5042,9 +4985,6 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx)
* same
*/
case GD_OP_DEFRAG_BRICK_VOLUME:
- case GD_OP_TIER_STATUS:
- case GD_OP_REMOVE_TIER_BRICK:
- case GD_OP_DETACH_TIER_STATUS:
case GD_OP_SCRUB_STATUS:
case GD_OP_SCRUB_ONDEMAND:
ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count);
@@ -5216,7 +5156,7 @@ glusterd_op_ac_send_commit_op(glusterd_op_sm_event_t *event, void *ctx)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -5236,7 +5176,7 @@ glusterd_op_ac_send_commit_op(glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
goto out;
@@ -5254,7 +5194,7 @@ glusterd_op_ac_send_commit_op(glusterd_op_sm_event_t *event, void *ctx)
pending_count++;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
opinfo.pending_count = pending_count;
gf_msg_debug(this->name, 0,
@@ -5652,9 +5592,17 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
dict_t *dict = NULL;
xlator_t *this = NULL;
uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_conf_t *priv = NULL;
this = THIS;
GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
GF_ASSERT(ctx);
req_ctx = ctx;
@@ -5686,12 +5634,14 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
ret = -1;
goto out;
}
+ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info);
ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id));
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Failed to set transaction id.");
GF_FREE(txn_id);
+ txn_id = NULL;
goto out;
}
@@ -5704,6 +5654,16 @@ out:
gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ /* for no volname transactions, the txn_opinfo needs to be cleaned up
+ * as there's no unlock event triggered. However if the originator node of
+ * this transaction is still running with a version lower than 60000,
+ * txn_opinfo can't be cleared as that'll lead to a race of referring op_ctx
+ * after it's being freed.
+ */
+ if (txn_op_info.skip_locking && priv->op_version >= GD_OP_VERSION_6_0 &&
+ txn_id)
+ ret = glusterd_clear_txn_opinfo(txn_id);
+
if (rsp_dict)
dict_unref(rsp_dict);
@@ -5720,8 +5680,6 @@ glusterd_need_brick_op(glusterd_op_t op)
switch (op) {
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
- case GD_OP_TIER_STATUS:
- case GD_OP_DETACH_TIER_STATUS:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_HEAL_VOLUME:
case GD_OP_SCRUB_STATUS:
@@ -5934,6 +5892,10 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
ret = glusterd_op_stage_set_volume(dict, op_errstr);
break;
+ case GD_OP_GANESHA:
+ ret = glusterd_op_stage_set_ganesha(dict, op_errstr);
+ break;
+
case GD_OP_RESET_VOLUME:
ret = glusterd_op_stage_reset_volume(dict, op_errstr);
break;
@@ -6015,13 +5977,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
static void
glusterd_wait_for_blockers(glusterd_conf_t *priv)
{
- uint64_t blockers = GF_ATOMIC_GET(priv->blockers);
-
- while (blockers) {
- synclock_unlock(&priv->big_lock);
- sleep(1);
- blockers = GF_ATOMIC_GET(priv->blockers);
- synclock_lock(&priv->big_lock);
+ while (GF_ATOMIC_GET(priv->blockers)) {
+ synccond_wait(&priv->cond_blockers, &priv->big_lock);
}
}
@@ -6064,7 +6021,9 @@ glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr,
case GD_OP_SET_VOLUME:
ret = glusterd_op_set_volume(dict, op_errstr);
break;
-
+ case GD_OP_GANESHA:
+ ret = glusterd_op_set_ganesha(dict, op_errstr);
+ break;
case GD_OP_RESET_VOLUME:
ret = glusterd_op_reset_volume(dict, op_errstr);
break;
@@ -6249,9 +6208,6 @@ glusterd_bricks_select_remove_brick(dict_t *dict, char **op_errstr,
goto out;
}
- if (command == GF_DEFRAG_CMD_DETACH_START)
- return glusterd_bricks_select_tier_volume(dict, op_errstr, selected);
-
ret = dict_get_int32n(dict, "force", SLEN("force"), &force);
if (ret) {
gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
@@ -6355,6 +6311,7 @@ glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr,
goto out;
break;
case GF_CLI_STATS_INFO:
+#ifdef BUILD_GNFS
ret = dict_get_str_boolean(dict, "nfs", _gf_false);
if (ret) {
if (!priv->nfs_svc.online) {
@@ -6379,6 +6336,7 @@ glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr,
ret = 0;
goto out;
}
+#endif
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
if (glusterd_is_brick_started(brickinfo)) {
@@ -6410,6 +6368,7 @@ glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr,
break;
case GF_CLI_STATS_TOP:
+#ifdef BUILD_GNFS
ret = dict_get_str_boolean(dict, "nfs", _gf_false);
if (ret) {
if (!priv->nfs_svc.online) {
@@ -6434,6 +6393,7 @@ glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr,
ret = 0;
goto out;
}
+#endif
ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
if (!ret) {
ret = glusterd_volume_brickinfo_get_by_brick(
@@ -6653,6 +6613,10 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
int hxl_children = 0;
uuid_t candidate = {0};
+ int brick_index = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int delta = 0;
+ uuid_t candidate_max = {0};
if ((*index) == 0)
(*index)++;
@@ -6664,13 +6628,40 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
+ if (gf_uuid_compare(brickinfo->uuid, candidate_max) > 0) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
+ }
+ }
+ }
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
if (gf_uuid_is_null(brickinfo->uuid))
(void)glusterd_resolve_brick(brickinfo);
- if (gf_uuid_compare(brickinfo->uuid, candidate) > 0)
- gf_uuid_copy(candidate, brickinfo->uuid);
+ delta %= hxl_children;
+ if ((*index + delta) == (brick_index + hxl_children)) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else if (peerinfo &&
+ (!gf_uuid_compare(candidate_max, MY_UUID))) {
+ _add_hxlator_to_dict(dict, volinfo,
+ ((*index) - 1) / hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ }
- if ((*index) % hxl_children == 0) {
if (!gf_uuid_compare(MY_UUID, candidate)) {
_add_hxlator_to_dict(dict, volinfo,
((*index) - 1) / hxl_children,
@@ -6678,6 +6669,8 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
(*hxlator_count)++;
}
gf_uuid_clear(candidate);
+ brick_index += hxl_children;
+ delta++;
}
(*index)++;
@@ -6747,12 +6740,12 @@ fill_shd_status_for_local_bricks(dict_t *dict, glusterd_volinfo_t *volinfo,
dict_t *req_dict)
{
glusterd_brickinfo_t *brickinfo = NULL;
- char *msg = "self-heal-daemon is not running on";
- char key[1024] = {
+ static char *msg = "self-heal-daemon is not running on";
+ char key[32] = {
0,
};
int keylen;
- char value[1024] = {
+ char value[128] = {
0,
};
int ret = 0;
@@ -6821,16 +6814,18 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
int ret = -1;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
+ svc = &(volinfo->shd.svc);
switch (heal_op) {
case GF_SHD_OP_INDEX_SUMMARY:
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
- if (!priv->shd_svc.online) {
+ if (!svc->online) {
if (!rsp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
"Received "
@@ -6851,7 +6846,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
- if (!priv->shd_svc.online) {
+ if (!svc->online) {
if (!rsp_dict) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
"Received "
@@ -6902,7 +6897,6 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
char *volname = NULL;
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
- glusterd_volinfo_t *dup_volinfo = NULL;
xlator_t *this = NULL;
char msg[2048] = {
0,
@@ -6940,31 +6934,10 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
"heal op invalid");
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_create_sub_tier_volinfo(volinfo, &dup_volinfo, _gf_false,
- volname);
- if (ret < 0)
- goto out;
-
- ret = glusterd_shd_select_brick_xlator(
- dict, heal_op, dup_volinfo, &index, &hxlator_count, rsp_dict);
- glusterd_volinfo_delete(dup_volinfo);
- if (ret < 0)
- goto out;
- ret = glusterd_create_sub_tier_volinfo(volinfo, &dup_volinfo, _gf_true,
- volname);
- if (ret < 0)
- goto out;
- ret = glusterd_shd_select_brick_xlator(
- dict, heal_op, dup_volinfo, &index, &hxlator_count, rsp_dict);
- glusterd_volinfo_delete(dup_volinfo);
- if (ret < 0)
- goto out;
- } else {
- ret = glusterd_shd_select_brick_xlator(dict, heal_op, volinfo, &index,
- &hxlator_count, rsp_dict);
- if (ret < 0)
- goto out;
+ ret = glusterd_shd_select_brick_xlator(dict, heal_op, volinfo, &index,
+ &hxlator_count, rsp_dict);
+ if (ret < 0) {
+ goto out;
}
if (!hxlator_count)
@@ -6986,7 +6959,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
ret = -1;
goto out;
} else {
- pending_node->node = &(priv->shd_svc);
+ pending_node->node = &(volinfo->shd.svc);
pending_node->type = GD_NODE_SHD;
cds_list_add_tail(&pending_node->list, selected);
pending_node = NULL;
@@ -6997,69 +6970,7 @@ out:
return ret;
}
-int
-glusterd_bricks_select_tier_volume(dict_t *dict, char **op_errstr,
- struct cds_list_head *selected)
-{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char msg[2048] = {
- 0,
- };
- glusterd_pending_node_t *pending_node = NULL;
- glusterd_brickinfo_t *brick = NULL;
- gf_boolean_t retval = _gf_false;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "volume name get failed");
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
-
- *op_errstr = gf_strdup(msg);
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
- goto out;
- }
- /*check if this node needs tierd*/
- cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)
- {
- if (gf_uuid_compare(MY_UUID, brick->uuid) == 0) {
- retval = _gf_true;
- break;
- }
- }
-
- if (!retval)
- goto out;
-
- pending_node = GF_CALLOC(1, sizeof(*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = volinfo;
- pending_node->type = GD_NODE_TIERD;
- cds_list_add_tail(&pending_node->list, selected);
- pending_node = NULL;
- }
- ret = 0;
-
-out:
- return ret;
-}
-
-int
+static int
glusterd_bricks_select_rebalance_volume(dict_t *dict, char **op_errstr,
struct cds_list_head *selected)
{
@@ -7120,6 +7031,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
glusterd_pending_node_t *pending_node = NULL;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
+ glusterd_svc_t *svc = NULL;
GF_ASSERT(dict);
@@ -7148,7 +7060,6 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
case GF_CLI_STATUS_SHD:
case GF_CLI_STATUS_QUOTAD:
case GF_CLI_STATUS_SNAPD:
- case GF_CLI_STATUS_TIERD:
case GF_CLI_STATUS_BITD:
case GF_CLI_STATUS_SCRUB:
case GF_CLI_STATUS_CLIENT_LIST:
@@ -7195,6 +7106,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
cds_list_add_tail(&pending_node->list, selected);
ret = 0;
+#ifdef BUILD_GNFS
} else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
if (!priv->nfs_svc.online) {
ret = -1;
@@ -7214,8 +7126,10 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
cds_list_add_tail(&pending_node->list, selected);
ret = 0;
+#endif
} else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- if (!priv->shd_svc.online) {
+ svc = &(volinfo->shd.svc);
+ if (!svc->online) {
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED,
"Self-heal daemon is not running");
@@ -7227,7 +7141,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
ret = -1;
goto out;
}
- pending_node->node = &(priv->shd_svc);
+ pending_node->node = svc;
pending_node->type = GD_NODE_SHD;
pending_node->index = 0;
cds_list_add_tail(&pending_node->list, selected);
@@ -7293,30 +7207,6 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
cds_list_add_tail(&pending_node->list, selected);
ret = 0;
- } else if ((cmd & GF_CLI_STATUS_TIERD) != 0) {
- if (!volinfo->tierd.svc.online) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TIERD_NOT_RUNNING,
- "tierd is not "
- "running");
- ret = -1;
- goto out;
- }
- pending_node = GF_CALLOC(1, sizeof(*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "failed to allocate "
- "memory for pending node");
- ret = -1;
- goto out;
- }
-
- pending_node->node = (void *)(&volinfo->tierd);
- pending_node->type = GD_NODE_TIERD;
- pending_node->index = 0;
- cds_list_add_tail(&pending_node->list, selected);
-
- ret = 0;
} else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
if (!volinfo->snapd.svc.online) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_NOT_RUNNING,
@@ -7492,6 +7382,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
glusterd_op_t op = GD_OP_NONE;
glusterd_req_ctx_t *req_ctx = NULL;
char *op_errstr = NULL;
+ gf_boolean_t free_req_ctx = _gf_false;
this = THIS;
priv = this->private;
@@ -7500,6 +7391,9 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
req_ctx = ctx;
} else {
req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t);
+ if (!req_ctx)
+ goto out;
+ free_req_ctx = _gf_true;
op = glusterd_op_get_op();
req_ctx->op = op;
gf_uuid_copy(req_ctx->uuid, MY_UUID);
@@ -7511,7 +7405,6 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
if (op_errstr == NULL)
gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
opinfo.op_errstr = op_errstr;
- GF_FREE(req_ctx);
goto out;
}
}
@@ -7530,6 +7423,8 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
}
out:
+ if (ret && free_req_ctx)
+ GF_FREE(req_ctx);
gf_msg_debug(this->name, 0, "Returning with %d", ret);
return ret;
@@ -7631,11 +7526,6 @@ glusterd_op_bricks_select(glusterd_op_t op, dict_t *dict, char **op_errstr,
ret = glusterd_bricks_select_status_volume(dict, op_errstr,
selected);
break;
- case GD_OP_TIER_STATUS:
- ret = glusterd_bricks_select_tier_volume(dict, op_errstr, selected);
- break;
-
- case GD_OP_DETACH_TIER_STATUS:
case GD_OP_DEFRAG_BRICK_VOLUME:
ret = glusterd_bricks_select_rebalance_volume(dict, op_errstr,
selected);
@@ -8079,9 +7969,12 @@ glusterd_op_sm()
glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
xlator_t *this = NULL;
glusterd_op_info_t txn_op_info;
+ glusterd_conf_t *priv = NULL;
this = THIS;
GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
ret = synclock_trylock(&gd_op_sm_lock);
if (ret) {
@@ -8159,12 +8052,17 @@ glusterd_op_sm()
"Unable to clear "
"transaction's opinfo");
} else {
- ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_TRANS_OPINFO_SET_FAIL,
- "Unable to set "
- "transaction's opinfo");
+ if ((priv->op_version < GD_OP_VERSION_6_0) ||
+ !(event_type == GD_OP_EVENT_STAGE_OP &&
+ opinfo.state.state == GD_OP_STATE_STAGED &&
+ opinfo.skip_locking)) {
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+ }
}
glusterd_destroy_op_event_ctx(event);
@@ -8234,13 +8132,11 @@ glusterd_op_free_ctx(glusterd_op_t op, void *ctx)
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
case GD_OP_REBALANCE:
- case GD_OP_TIER_START_STOP:
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_MAX_OPVERSION:
- case GD_OP_TIER_STATUS:
dict_unref(ctx);
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 4fcaff1c8ba..8a24b16612a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -11,13 +11,13 @@
#define _GLUSTERD_OP_SM_H_
#include <pthread.h>
-#include "compat-uuid.h"
+#include <glusterfs/compat-uuid.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "byte-order.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "glusterd.h"
#include "protocol-common.h"
#include "glusterd-hooks.h"
@@ -259,9 +259,6 @@ glusterd_op_init_commit_rsp_dict(glusterd_op_t op);
void
glusterd_op_modify_op_ctx(glusterd_op_t op, void *op_ctx);
-void
-glusterd_op_perform_detach_tier(glusterd_volinfo_t *volinfo);
-
int
glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo);
@@ -286,10 +283,6 @@ glusterd_stop_bricks(glusterd_volinfo_t *volinfo);
int
glusterd_defrag_volume_node_rsp(dict_t *req_dict, dict_t *rsp_dict,
dict_t *op_ctx);
-#ifdef HAVE_BD_XLATOR
-int
-glusterd_is_valid_vg(glusterd_brickinfo_t *brick, int check_tag, char *msg);
-#endif
int32_t
glusterd_get_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo);
@@ -309,6 +302,12 @@ glusterd_set_opinfo(char *errstr, int32_t op_errno, int32_t op_ret);
int
glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr);
-int32_t
-glusterd_tier_op(xlator_t *this, void *data);
+int
+glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int
+glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr);
+
+int
+gd_set_commit_hash(dict_t *dict);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
index 7d2d28520fc..18d355cb186 100644
--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
@@ -12,7 +12,7 @@
#include "glusterd-store.h"
#include "glusterd-server-quorum.h"
#include "glusterd-messages.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-utils.h"
void
@@ -48,6 +48,7 @@ glusterd_peerinfo_destroy(struct rcu_head *head)
}
glusterd_sm_tr_log_delete(&peerinfo->sm_log);
+ pthread_mutex_unlock(&peerinfo->delete_lock);
pthread_mutex_destroy(&peerinfo->delete_lock);
GF_FREE(peerinfo);
@@ -81,10 +82,112 @@ glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo)
call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy);
if (quorum_action)
+ /* coverity[SLEEP] */
glusterd_do_quorum_action();
return 0;
}
+/* gd_peerinfo_find_from_hostname iterates over all the addresses saved for each
+ * peer and matches it to @hoststr.
+ * Returns the matched peer if found else returns NULL
+ */
+static glusterd_peerinfo_t *
+gd_peerinfo_find_from_hostname(const char *hoststr)
+{
+ xlator_t *this = THIS;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *tmphost = NULL;
+
+ GF_ASSERT(this != NULL);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (priv != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (hoststr != NULL), out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &priv->peers, uuid_list)
+ {
+ cds_list_for_each_entry_rcu(tmphost, &peer->hostnames, hostname_list)
+ {
+ if (!strncasecmp(tmphost->hostname, hoststr, 1024)) {
+ gf_msg_debug(this->name, 0, "Friend %s found.. state: %d",
+ tmphost->hostname, peer->state.state);
+ found = peer; /* Probably needs to be
+ dereferenced*/
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
+out:
+ return found;
+}
+
+/* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each
+ * peer, resolves them and compares them to @addr.
+ *
+ *
+ * NOTE: As getaddrinfo is a blocking call and is being performed multiple times
+ * in this function, it could lead to the calling thread to be blocked for
+ * significant amounts of time.
+ *
+ * Returns the matched peer if found else returns NULL
+ */
+static glusterd_peerinfo_t *
+gd_peerinfo_find_from_addrinfo(const struct addrinfo *addr)
+{
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *address = NULL;
+ int ret = 0;
+ struct addrinfo *paddr = NULL;
+ struct addrinfo *tmp = NULL;
+
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list)
+ {
+ cds_list_for_each_entry_rcu(address, &peer->hostnames, hostname_list)
+ {
+ /* TODO: Cache the resolved addrinfos to improve
+ * performance
+ */
+ ret = getaddrinfo(address->hostname, NULL, NULL, &paddr);
+ if (ret) {
+ /* Don't fail if getaddrinfo fails, continue
+ * onto the next address
+ */
+ gf_msg_trace(this->name, 0, "getaddrinfo for %s failed (%s)",
+ address->hostname, gai_strerror(ret));
+ continue;
+ }
+
+ for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) {
+ if (gf_compare_sockaddr(addr->ai_addr, tmp->ai_addr)) {
+ found = peer; /* (de)referenced? */
+ break;
+ }
+ }
+
+ freeaddrinfo(paddr);
+ if (found)
+ goto unlock;
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
+out:
+ return found;
+}
+
/* glusterd_peerinfo_find_by_hostname searches for a peer which matches the
* hostname @hoststr and if found returns the pointer to peerinfo object.
* Returns NULL otherwise.
@@ -99,14 +202,11 @@ glusterd_peerinfo_find_by_hostname(const char *hoststr)
int ret = -1;
struct addrinfo *addr = NULL;
struct addrinfo *p = NULL;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
glusterd_peerinfo_t *peerinfo = NULL;
- this = THIS;
GF_ASSERT(hoststr);
- peerinfo = NULL;
-
peerinfo = gd_peerinfo_find_from_hostname(hoststr);
if (peerinfo)
return peerinfo;
@@ -176,31 +276,33 @@ glusterd_peerinfo_find_by_uuid(uuid_t uuid)
glusterd_conf_t *priv = NULL;
glusterd_peerinfo_t *entry = NULL;
glusterd_peerinfo_t *found = NULL;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
+ glusterd_friend_sm_state_t state;
- this = THIS;
GF_ASSERT(this);
+ if (gf_uuid_is_null(uuid))
+ return NULL;
+
priv = this->private;
GF_ASSERT(priv);
- if (gf_uuid_is_null(uuid))
- return NULL;
-
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
{
if (!gf_uuid_compare(entry->uuid, uuid)) {
- gf_msg_debug(this->name, 0, "Friend found... state: %s",
- glusterd_friend_sm_state_name_get(entry->state.state));
found = entry; /* Probably should be rcu_dereferenced */
+ state = found->state.state;
break;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
- if (!found)
+ if (found)
+ gf_msg_debug(this->name, 0, "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get(state));
+ else
gf_msg_debug(this->name, 0, "Friend with uuid: %s, not found",
uuid_utoa(uuid));
return found;
@@ -214,9 +316,8 @@ glusterd_peerinfo_t *
glusterd_peerinfo_find(uuid_t uuid, const char *hostname)
{
glusterd_peerinfo_t *peerinfo = NULL;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
- this = THIS;
GF_ASSERT(this);
if (uuid) {
@@ -266,8 +367,10 @@ glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid,
GF_ASSERT(conf);
new_peer = GF_CALLOC(1, sizeof(*new_peer), gf_gld_mt_peerinfo_t);
- if (!new_peer)
+ if (!new_peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&new_peer->uuid_list);
@@ -323,7 +426,7 @@ glusterd_chk_peers_connected_befriended(uuid_t skip_uuid)
priv = THIS->private;
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
if (!gf_uuid_is_null(skip_uuid) &&
@@ -336,7 +439,7 @@ glusterd_chk_peers_connected_befriended(uuid_t skip_uuid)
break;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg_debug(THIS->name, 0, "Returning %s", (ret ? "TRUE" : "FALSE"));
return ret;
@@ -357,8 +460,9 @@ glusterd_uuid_to_hostname(uuid_t uuid)
if (!gf_uuid_compare(MY_UUID, uuid)) {
hostname = gf_strdup("localhost");
+ return hostname;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
if (!cds_list_empty(&priv->peers)) {
cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
{
@@ -368,7 +472,7 @@ glusterd_uuid_to_hostname(uuid_t uuid)
}
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return hostname;
}
@@ -399,15 +503,15 @@ glusterd_are_all_peers_up()
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
if (!peerinfo->connected) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
goto out;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
peers_up = _gf_true;
@@ -428,7 +532,7 @@ glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo,
if (!gf_uuid_compare(brickinfo->uuid, MY_UUID))
continue;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list)
{
if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid))
@@ -439,13 +543,12 @@ glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo,
if (!(peerinfo->connected) ||
(peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) {
*down_peerstr = gf_strdup(peerinfo->hostname);
- gf_msg_debug(THIS->name, 0, "Peer %s is down. ",
- peerinfo->hostname);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
+ gf_msg_debug(THIS->name, 0, "Peer %s is down. ", *down_peerstr);
goto out;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
}
ret = _gf_true;
@@ -463,12 +566,16 @@ glusterd_peer_hostname_new(const char *hostname,
GF_ASSERT(hostname);
GF_ASSERT(name);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
peer_hostname = GF_CALLOC(1, sizeof(*peer_hostname),
gf_gld_mt_peer_hostname_t);
- if (!peer_hostname)
+ if (!peer_hostname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
peer_hostname->hostname = gf_strdup(hostname);
CDS_INIT_LIST_HEAD(&peer_hostname->hostname_list);
@@ -500,7 +607,6 @@ glusterd_peer_hostname_free(glusterd_peer_hostname_t *name)
gf_boolean_t
gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address)
{
- gf_boolean_t ret = _gf_false;
glusterd_peer_hostname_t *hostname = NULL;
GF_VALIDATE_OR_GOTO("glusterd", (peerinfo != NULL), out);
@@ -509,13 +615,12 @@ gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address)
cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list)
{
if (strcmp(hostname->hostname, address) == 0) {
- ret = _gf_true;
- break;
+ return _gf_true;
}
}
out:
- return ret;
+ return _gf_false;
}
int
@@ -624,112 +729,6 @@ out:
return ret;
}
-/* gd_peerinfo_find_from_hostname iterates over all the addresses saved for each
- * peer and matches it to @hoststr.
- * Returns the matched peer if found else returns NULL
- */
-glusterd_peerinfo_t *
-gd_peerinfo_find_from_hostname(const char *hoststr)
-{
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_peerinfo_t *peer = NULL;
- glusterd_peerinfo_t *found = NULL;
- glusterd_peer_hostname_t *tmphost = NULL;
-
- this = THIS;
- GF_ASSERT(this != NULL);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, (priv != NULL), out);
-
- GF_VALIDATE_OR_GOTO(this->name, (hoststr != NULL), out);
-
- rcu_read_lock();
- cds_list_for_each_entry_rcu(peer, &priv->peers, uuid_list)
- {
- cds_list_for_each_entry_rcu(tmphost, &peer->hostnames, hostname_list)
- {
- if (!strncasecmp(tmphost->hostname, hoststr, 1024)) {
- gf_msg_debug(this->name, 0, "Friend %s found.. state: %d",
- tmphost->hostname, peer->state.state);
- found = peer; /* Probably needs to be
- dereferenced*/
- goto unlock;
- }
- }
- }
-unlock:
- rcu_read_unlock();
-out:
- return found;
-}
-
-/* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each
- * peer, resolves them and compares them to @addr.
- *
- *
- * NOTE: As getaddrinfo is a blocking call and is being performed multiple times
- * in this function, it could lead to the calling thread to be blocked for
- * significant amounts of time.
- *
- * Returns the matched peer if found else returns NULL
- */
-glusterd_peerinfo_t *
-gd_peerinfo_find_from_addrinfo(const struct addrinfo *addr)
-{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- glusterd_peerinfo_t *peer = NULL;
- glusterd_peerinfo_t *found = NULL;
- glusterd_peer_hostname_t *address = NULL;
- int ret = 0;
- struct addrinfo *paddr = NULL;
- struct addrinfo *tmp = NULL;
-
- this = THIS;
- GF_ASSERT(this != NULL);
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
-
- GF_VALIDATE_OR_GOTO(this->name, (addr != NULL), out);
-
- rcu_read_lock();
- cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list)
- {
- cds_list_for_each_entry_rcu(address, &peer->hostnames, hostname_list)
- {
- /* TODO: Cache the resolved addrinfos to improve
- * performance
- */
- ret = getaddrinfo(address->hostname, NULL, NULL, &paddr);
- if (ret) {
- /* Don't fail if getaddrinfo fails, continue
- * onto the next address
- */
- gf_msg_trace(this->name, 0, "getaddrinfo for %s failed (%s)",
- address->hostname, gai_strerror(ret));
- ret = 0;
- continue;
- }
-
- for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) {
- if (gf_compare_sockaddr(addr->ai_addr, tmp->ai_addr)) {
- found = peer; /* (de)referenced? */
- break;
- }
- }
-
- freeaddrinfo(paddr);
- if (found)
- goto unlock;
- }
- }
-unlock:
- rcu_read_unlock();
-out:
- return found;
-}
-
/* gd_update_peerinfo_from_dict will update the hostnames for @peerinfo from
* peer details with @prefix in @dict.
* Returns 0 on success and -1 on failure.
@@ -830,7 +829,7 @@ gd_peerinfo_from_dict(dict_t *dict, const char *prefix)
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
glusterd_peerinfo_t *new_peer = NULL;
- char key[100] = {
+ char key[64] = {
0,
};
char *uuid_str = NULL;
@@ -875,14 +874,14 @@ out:
return new_peer;
}
-int
+static int
gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
const char *prefix)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
- char key[256] = {
+ char key[64] = {
0,
};
glusterd_peer_hostname_t *addr = NULL;
@@ -907,8 +906,11 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
{
snprintf(key, sizeof(key), "%s.hostname%d", prefix, count);
ret = dict_set_dynstr_with_alloc(dict, key, addr->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
count++;
}
@@ -924,47 +926,67 @@ gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends,
int count)
{
int ret = -1;
- char key[64] = {
+ char key[32] = {
0,
};
int keylen;
char *peer_uuid_str = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(peerinfo);
GF_ASSERT(friends);
peer_uuid_str = gd_peer_uuid_str(peerinfo);
keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
ret = dict_set_strn(friends, key, keylen, peer_uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
ret = dict_set_strn(friends, key, keylen, peerinfo->hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.port", count);
ret = dict_set_int32n(friends, key, keylen, peerinfo->port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.stateId", count);
ret = dict_set_int32n(friends, key, keylen, peerinfo->state.state);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s in dict", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.state", count);
ret = dict_set_strn(
friends, key, keylen,
glusterd_friend_sm_state_name_get(peerinfo->state.state));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "key=%s",
+ key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
ret = dict_set_int32n(friends, key, keylen, (int32_t)peerinfo->connected);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "friend%d", count);
ret = gd_add_peer_hostnames_to_dict(peerinfo, friends, key);
@@ -983,28 +1005,30 @@ glusterd_peerinfo_find_by_generation(uint32_t generation)
glusterd_conf_t *priv = NULL;
glusterd_peerinfo_t *entry = NULL;
glusterd_peerinfo_t *found = NULL;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
+ glusterd_friend_sm_state_t state;
- this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
{
if (entry->generation == generation) {
- gf_msg_debug(this->name, 0, "Friend found... state: %s",
- glusterd_friend_sm_state_name_get(entry->state.state));
found = entry; /* Probably should be rcu_dereferenced */
+ state = found->state.state;
break;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
- if (!found)
+ if (found)
+ gf_msg_debug(this->name, 0, "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get(state));
+ else
gf_msg_debug(this->name, 0,
"Friend with generation: %" PRIu32 ", not found",
generation);
@@ -1025,9 +1049,9 @@ glusterd_get_peers_count()
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list) count++;
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
out:
return count;
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
index 47cbf6ee13d..fd254d57391 100644
--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
@@ -64,12 +64,6 @@ int
gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict,
const char *prefix);
-glusterd_peerinfo_t *
-gd_peerinfo_find_from_hostname(const char *hoststr);
-
-glusterd_peerinfo_t *
-gd_peerinfo_find_from_addrinfo(const struct addrinfo *addr);
-
int
gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
const char *prefix);
@@ -78,9 +72,6 @@ glusterd_peerinfo_t *
gd_peerinfo_from_dict(dict_t *dict, const char *prefix);
int
-gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
- const char *prefix);
-int
gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends,
int count);
glusterd_peerinfo_t *
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 27d664567cf..16ac628ab82 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "glusterfs.h"
-#include "syscall.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-utils.h"
@@ -433,17 +433,20 @@ __gluster_pmap_portbybrick(rpcsvc_request_t *req)
char *brick = NULL;
int port = 0;
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args,
(xdrproc_t)xdr_pmap_port_by_brick_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
brick = args.brick;
- port = pmap_registry_search(THIS, brick, GF_PMAP_PORT_BRICKSERVER,
+ port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER,
_gf_false);
if (!port)
@@ -475,11 +478,14 @@ __gluster_pmap_brickbyport(rpcsvc_request_t *req)
0,
};
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args,
(xdrproc_t)xdr_pmap_brick_by_port_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
@@ -513,10 +519,13 @@ __gluster_pmap_signin(rpcsvc_request_t *req)
};
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
@@ -570,6 +579,7 @@ __gluster_pmap_signout(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto fail;
}
rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick,
@@ -635,16 +645,16 @@ gluster_pmap_signout(rpcsvc_request_t *req)
return glusterd_big_locked_handler(req, __gluster_pmap_signout);
}
-rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = {
- [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0, DRC_NA},
- [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK,
- gluster_pmap_portbybrick, NULL, 0, DRC_NA},
- [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT,
- gluster_pmap_brickbyport, NULL, 0, DRC_NA},
- [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, gluster_pmap_signin, NULL, 0,
- DRC_NA},
- [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, gluster_pmap_signout, NULL,
- 0, DRC_NA},
+static rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = {"NULL", NULL, NULL, GF_PMAP_NULL, DRC_NA, 0},
+ [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", gluster_pmap_portbybrick, NULL,
+ GF_PMAP_PORTBYBRICK, DRC_NA, 0},
+ [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", gluster_pmap_brickbyport, NULL,
+ GF_PMAP_BRICKBYPORT, DRC_NA, 0},
+ [GF_PMAP_SIGNIN] = {"SIGNIN", gluster_pmap_signin, NULL, GF_PMAP_SIGNIN,
+ DRC_NA, 0},
+ [GF_PMAP_SIGNOUT] = {"SIGNOUT", gluster_pmap_signout, NULL, GF_PMAP_SIGNOUT,
+ DRC_NA, 0},
};
struct rpcsvc_program gluster_pmap_prog = {
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
index 8a3ebac48a0..51d75361431 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -11,26 +11,26 @@
#define _GLUSTERD_PMAP_H_
#include <pthread.h>
-#include "compat-uuid.h"
+#include <glusterfs/compat-uuid.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "byte-order.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "rpcsvc.h"
struct pmap_port_status {
- gf_pmap_port_type_t type;
char *brickname;
void *xprt;
+ gf_pmap_port_type_t type;
};
struct pmap_registry {
+ struct pmap_port_status ports[GF_PORT_MAX + 1];
int base_port;
int max_port;
int last_alloc;
- struct pmap_port_status ports[GF_PORT_MAX + 1];
};
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
index 200e3056117..a05c90d7b10 100644
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -14,9 +14,9 @@
#include "glusterd.h"
#include "glusterd-utils.h"
-#include "common-utils.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
#include "glusterd-messages.h"
#include "glusterd-proc-mgmt.h"
@@ -107,12 +107,14 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
"service, reason:%s",
proc->name, strerror(errno));
}
+ } else {
+ (void)glusterd_unlink_file(proc->pidfile);
}
if (flags != PROC_STOP_FORCE)
goto out;
synclock_unlock(&conf->big_lock);
- sleep(1);
+ synctask_sleep(1);
synclock_lock(&conf->big_lock);
if (gf_is_service_running(proc->pidfile, &pid)) {
ret = kill(pid, SIGKILL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
index fd500ca606e..8370c174ce3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quota.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -7,22 +7,21 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-store.h"
#include "glusterd-utils.h"
-#include "glusterd-nfs-svc.h"
#include "glusterd-quotad-svc.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
-#include "run.h"
-#include "syscall.h"
-#include "byte-order.h"
-#include "compat-errno.h"
-#include "quota-common-utils.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/quota-common-utils.h>
#include "glusterd-quota.h"
#include <sys/wait.h>
@@ -40,6 +39,49 @@
/* Any negative pid to make it special client */
#define QUOTA_CRAWL_PID "-100"
+#define GLUSTERFS_GET_QUOTA_LIMIT_MOUNT_PIDFILE(pidfile, volname) \
+ { \
+ snprintf(pidfile, PATH_MAX - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_limit.pid", volname); \
+ }
+
+#define GLUSTERFS_GET_QUOTA_LIST_MOUNT_PIDFILE(pidfile, volname) \
+ { \
+ snprintf(pidfile, PATH_MAX - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list.pid", volname); \
+ }
+
+#define GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(piddir, volinfo, type) \
+ do { \
+ char _volpath[PATH_MAX] = { \
+ 0, \
+ }; \
+ int32_t _crawl_pid_len; \
+ GLUSTERD_GET_VOLUME_DIR(_volpath, volinfo, priv); \
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE || \
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) \
+ _crawl_pid_len = snprintf(piddir, PATH_MAX, "%s/run/quota/enable", \
+ _volpath); \
+ else \
+ _crawl_pid_len = snprintf(piddir, PATH_MAX, \
+ "%s/run/quota/disable", _volpath); \
+ if ((_crawl_pid_len < 0) || (_crawl_pid_len >= PATH_MAX)) { \
+ piddir[0] = 0; \
+ } \
+ } while (0)
+
+#define GLUSTERD_GET_TMP_PATH(abspath, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/tmp%s", path); \
+ } while (0)
+
+#define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list%s", volname, path); \
+ } while (0)
+
const char *gd_quota_op_list[GF_QUOTA_OPTION_TYPE_MAX + 1] = {
[GF_QUOTA_OPTION_TYPE_NONE] = "none",
[GF_QUOTA_OPTION_TYPE_ENABLE] = "enable",
@@ -266,7 +308,7 @@ _glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv,
GF_VALIDATE_OR_GOTO("glusterd", THIS, out);
GLUSTERD_GET_TMP_PATH(mountdir, "/");
- ret = sys_mkdir(mountdir, 0777);
+ ret = sys_mkdir(mountdir, 0755);
if (ret && errno != EEXIST) {
gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_MOUNT_REQ_FAIL,
"failed to create temporary "
@@ -436,8 +478,9 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv,
if (dir == NULL)
return;
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
len = snprintf(pidfile, sizeof(pidfile), "%s/%s", pid_dir,
entry->d_name);
if ((len >= 0) && (len < sizeof(pidfile))) {
@@ -445,8 +488,6 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv,
_gf_true);
sys_unlink(pidfile);
}
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
sys_closedir(dir);
}
@@ -470,7 +511,7 @@ glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv,
goto out;
}
- ret = mkdir_p(DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, 0777, _gf_true);
+ ret = mkdir_p(DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, 0755, _gf_true);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED,
"failed to create dir %s: %s", DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY,
@@ -479,7 +520,7 @@ glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv,
}
GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(pid_dir, volinfo, type);
- ret = mkdir_p(pid_dir, 0777, _gf_true);
+ ret = mkdir_p(pid_dir, 0755, _gf_true);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED,
"failed to create dir %s: %s", pid_dir, strerror(errno));
@@ -770,6 +811,7 @@ glusterd_set_quota_limit(char *volname, char *path, char *hard_limit,
0,
};
double soft_limit_double = 0;
+ int64_t local_hl = 0;
this = THIS;
GF_ASSERT(this);
@@ -819,11 +861,11 @@ glusterd_set_quota_limit(char *volname, char *path, char *hard_limit,
new_limit.sl = hton64(new_limit.sl);
- ret = gf_string2bytesize_int64(hard_limit, &new_limit.hl);
+ ret = gf_string2bytesize_int64(hard_limit, &local_hl);
if (ret)
goto out;
- new_limit.hl = hton64(new_limit.hl);
+ new_limit.hl = hton64(local_hl);
ret = sys_lsetxattr(abspath, key, (char *)(void *)&new_limit,
sizeof(new_limit), 0);
@@ -1765,10 +1807,12 @@ glusterd_op_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
+#if BUILD_GNFS
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
if (priv->op_version == GD_OP_VERSION_MIN)
(void)priv->nfs_svc.manager(&(priv->nfs_svc), NULL, 0);
}
+#endif
if (rsp_dict && start_crawl == _gf_true)
glusterd_quota_initiate_fs_crawl(priv, volinfo, type);
@@ -1855,10 +1899,9 @@ glusterd_get_gfid_from_brick(dict_t *dict, glusterd_volinfo_t *volinfo,
}
ret = sys_lgetxattr(backend_path, GFID_XATTR_KEY, gfid, 16);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_SETXATTR_FAIL,
- "Failed to get "
- "extended attribute %s for directory %s. ",
- GFID_XATTR_KEY, backend_path);
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_XATTR_FAIL,
+ "Attribute=%s, Directory=%s", GFID_XATTR_KEY, backend_path,
+ NULL);
ret = 0;
continue;
}
@@ -1994,7 +2037,7 @@ glusterd_create_quota_auxiliary_mount(xlator_t *this, char *volname, int type)
fclose(file);
}
- ret = sys_mkdir(mountdir, 0777);
+ ret = sys_mkdir(mountdir, 0755);
if (ret && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_MOUNT_REQ_FAIL,
"Failed to create auxiliary "
@@ -2002,8 +2045,8 @@ glusterd_create_quota_auxiliary_mount(xlator_t *this, char *volname, int type)
mountdir);
goto out;
}
- snprintf(logfile, PATH_MAX - 1, "%s/quota-mount-%s.log",
- DEFAULT_LOG_FILE_DIRECTORY, volname);
+ snprintf(logfile, PATH_MAX - 1, "%s/quota-mount-%s.log", priv->logdir,
+ volname);
snprintf(qpid, 15, "%d", GF_CLIENT_PID_QUOTA_MOUNT);
if (dict_get_strn(this->options, "transport.socket.bind-address",
@@ -2050,7 +2093,6 @@ glusterd_op_stage_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
int ret = 0;
char *volname = NULL;
- gf_boolean_t exists = _gf_false;
int type = 0;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
@@ -2074,12 +2116,6 @@ glusterd_op_stage_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
- if (!exists) {
- gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
- ret = -1;
- goto out;
- }
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
index d93240e230a..f26d832a06d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -127,8 +127,10 @@ glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags)
char *options[] = {svc->name, "--process-name", NULL};
cmdline = dict_new();
- if (!cmdline)
+ if (!cmdline) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
for (i = 0; options[i]; i++) {
ret = snprintf(key, sizeof(key), "arg%d", i);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rcu.h b/xlators/mgmt/glusterd/src/glusterd-rcu.h
index 32ac3bbfd4e..c85f9bea8f8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rcu.h
+++ b/xlators/mgmt/glusterd/src/glusterd-rcu.h
@@ -21,7 +21,7 @@
#include "rculist-extra.h"
#endif
-#include "xlator.h"
+#include <glusterfs/xlator.h>
/* gd_rcu_head is a composite struct, composed of struct rcu_head and a this
* pointer, which is used to pass the THIS pointer to call_rcu callbacks.
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index f90d3de9843..458bf168ede 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -13,26 +13,45 @@
#include <sys/resource.h>
#include <sys/statvfs.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
#include "glusterd-messages.h"
#include "glusterd-store.h"
-#include "run.h"
+#include <glusterfs/run.h>
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \
+ do { \
+ int32_t _defrag_sockfile_len; \
+ char tmppath[PATH_MAX] = { \
+ 0, \
+ }; \
+ _defrag_sockfile_len = snprintf( \
+ tmppath, PATH_MAX, \
+ DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", "rebalance", \
+ volinfo->volname, uuid_utoa(MY_UUID)); \
+ if ((_defrag_sockfile_len < 0) || \
+ (_defrag_sockfile_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } else { \
+ glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \
+ } \
+ } while (0)
+
int32_t
glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe);
@@ -200,6 +219,9 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
char valgrind_logfile[PATH_MAX] = {
0,
};
+ char msg[1024] = {
+ 0,
+ };
char *volfileserver = NULL;
char *localtime_logging = NULL;
@@ -236,7 +258,7 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
glusterd_store_perform_node_state_store(volinfo);
GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv);
- ret = mkdir_p(defrag_path, 0777, _gf_true);
+ ret = mkdir_p(defrag_path, 0755, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
"Failed to create "
@@ -247,17 +269,21 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
- snprintf(logfile, PATH_MAX, "%s/%s-%s.log", DEFAULT_LOG_FILE_DIRECTORY,
- volinfo->volname,
- (cmd == GF_DEFRAG_CMD_START_TIER ? "tier" : "rebalance"));
+ snprintf(logfile, PATH_MAX, "%s/%s-%s.log", priv->logdir, volinfo->volname,
+ "rebalance");
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log",
- DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname);
+ priv->logdir, volinfo->volname);
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -265,18 +291,7 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
if (dict_get_strn(this->options, "transport.socket.bind-address",
SLEN("transport.socket.bind-address"),
- &volfileserver) == 0) {
- /*In the case of running multiple glusterds on a single machine,
- *we should ensure that log file and unix socket file should be
- *unique in given cluster */
-
- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv);
- snprintf(logfile, PATH_MAX, "%s/%s-%s-%s.log",
- DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname,
- (cmd == GF_DEFRAG_CMD_START_TIER ? "tier" : "rebalance"),
- uuid_utoa(MY_UUID));
-
- } else {
+ &volfileserver) != 0) {
volfileserver = "localhost";
}
@@ -287,11 +302,6 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
"*dht.assert-no-child-down=yes", "--xlator-option",
"*dht.readdir-optimize=on", "--process-name", "rebalance", NULL);
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- runner_add_arg(&runner, "--xlator-option");
- runner_argprintf(&runner, "*tier-dht.xattr-name=trusted.tier.tier-dht");
- }
-
runner_add_arg(&runner, "--xlator-option");
runner_argprintf(&runner, "*dht.rebalance-cmd=%d", cmd);
runner_add_arg(&runner, "--xlator-option");
@@ -314,6 +324,10 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
runner_add_arg(&runner, "--localtime-logging");
}
+ snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
ret = runner_run_nowait(&runner);
if (ret) {
gf_msg_debug("glusterd", 0, "rebalance command failed");
@@ -377,9 +391,6 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
- struct stat buf = {
- 0,
- };
this = THIS;
GF_ASSERT(this);
@@ -390,36 +401,20 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
if (!defrag)
goto out;
- GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
- /* Check if defrag sockfile exists in the new location
- * in /var/run/ , if it does not try the old location
- */
- ret = sys_stat(sockfile, &buf);
- /* TODO: Remove this once we don't need backward compatibility
- * with the older path
- */
- if (ret && (errno == ENOENT)) {
- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
- "Rebalance sockfile "
- "%s does not exist. Trying old path.",
- sockfile);
- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv);
- ret = sys_stat(sockfile, &buf);
- if (ret && (ENOENT == errno)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBAL_NO_SOCK_FILE,
- "Rebalance "
- "sockfile %s does not exist",
- sockfile);
- goto out;
- }
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
}
+ GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+
/* Setting frame-timeout to 10mins (600seconds).
* Unix domain sockets ensures that the connection is reliable. The
* default timeout of 30mins used for unreliable network connections is
* too long for unix domain socket connections.
*/
- ret = rpc_transport_unix_options_build(&options, sockfile, 600);
+ ret = rpc_transport_unix_options_build(options, sockfile, 600);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL,
"Unix options build failed");
@@ -436,6 +431,8 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
}
ret = 0;
out:
+ if (options)
+ dict_unref(options);
return ret;
}
@@ -479,18 +476,6 @@ glusterd_rebalance_cmd_validate(int cmd, char *volname,
goto out;
}
- ret = glusterd_disallow_op_for_tier(*volinfo, GD_OP_REBALANCE, cmd);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REBALANCE_CMD_IN_TIER_VOL,
- "Received rebalance command "
- "on Tier volume %s",
- volname);
- snprintf(op_errstr, len,
- "Rebalance operations are not "
- "supported on a tiered volume");
- goto out;
- }
-
ret = 0;
out:
@@ -506,6 +491,7 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req)
0,
}};
glusterd_conf_t *priv = NULL;
+ int32_t op = GD_OP_NONE;
dict_t *dict = NULL;
char *volname = NULL;
gf_cli_defrag_type cmd = 0;
@@ -525,6 +511,7 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req)
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -564,19 +551,25 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req)
if (ret)
goto out;
- if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
- (cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER) ||
- (cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) {
- ret = glusterd_op_begin(req, GD_OP_DEFRAG_BRICK_VOLUME, dict, msg,
- sizeof(msg));
+ if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) {
+ op = GD_OP_DEFRAG_BRICK_VOLUME;
} else
- ret = glusterd_op_begin(req, GD_OP_REBALANCE, dict, msg, sizeof(msg));
+ op = GD_OP_REBALANCE;
+ if (priv->op_version < GD_OP_VERSION_6_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+ GD_OP_VERSION_6_0);
+ ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, op,
+ dict);
+ }
out:
-
- glusterd_friend_sm();
- glusterd_op_sm();
-
if (ret) {
if (msg[0] == '\0')
snprintf(msg, sizeof(msg), "Operation failed");
@@ -585,8 +578,8 @@ out:
}
free(cli_req.dict.dict_val); // malloced by xdr
-
- return 0;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
int
@@ -629,7 +622,98 @@ glusterd_brick_validation(dict_t *dict, char *key, data_t *value, void *data)
}
int
-glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
+glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t cmd = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32n(rsp_dict, "rebalance-command",
+ SLEN("rebalance-command"), &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to validate");
+ goto out;
+ }
+
+ /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but
+ * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict
+ * here. So that cli can display the rebalance id.*/
+ if ((cmd == GF_DEFRAG_CMD_START) ||
+ (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) ||
+ (cmd == GF_DEFRAG_CMD_START_FORCE)) {
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance-id");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Failed to set rebalance id for volume %s",
+ volname);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", msg);
+ }
+ }
+ }
+ }
+
+ /* Set task-id, if available, in rsp_dict for operations other than
+ * start. This is needed when we want rebalance id in xml output
+ */
+ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+ ret = glusterd_copy_uuid_to_dict(
+ volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY, SLEN(GF_REMOVE_BRICK_TID_KEY));
+ else
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr)
{
char *volname = NULL;
char *cmd_str = NULL;
@@ -638,9 +722,7 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
char msg[2048] = {0};
glusterd_volinfo_t *volinfo = NULL;
char *task_id_str = NULL;
- dict_t *op_ctx = NULL;
xlator_t *this = 0;
- int32_t is_force = 0;
this = THIS;
GF_ASSERT(this);
@@ -665,28 +747,316 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
goto out;
}
switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support(
+ volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting rebalance in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
+ goto out;
+ }
+ /* Fall through */
+ case GF_DEFRAG_CMD_START_FORCE:
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_generate_and_set_task_id(
+ dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance-id");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ }
+ }
+ ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
+ GD_OP_REBALANCE);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "defrag start validate "
+ "failed for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
+ break;
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STOP:
+
+ ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "command string");
+ ret = -1;
+ goto out;
+ }
+ if ((strstr(cmd_str, "rebalance") != NULL) &&
+ (volinfo->rebal.op != GD_OP_REBALANCE)) {
+ snprintf(msg, sizeof(msg),
+ "Rebalance not started "
+ "for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr(cmd_str, "remove-brick") != NULL) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ snprintf(msg, sizeof(msg),
+ "remove-brick not "
+ "started for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ /* For remove-brick status/stop command check whether
+ * given input brick is part of volume or not.*/
+
+ ret = dict_foreach_fnmatch(dict, "brick*",
+ glusterd_brick_validation, volinfo);
+ if (ret == -1) {
+ snprintf(msg, sizeof(msg),
+ "Incorrect brick"
+ " for volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
+
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ gf_boolean_t volfile_update = _gf_false;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ uint32_t commit_hash;
+ int32_t is_force = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not given");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "command not given");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd validate failed");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_FORCE:
+
ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
if (ret)
is_force = 0;
+ if (!is_force) {
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- gf_asprintf(op_errstr,
- "volume %s is not a tier "
- "volume.",
- volinfo->volname);
- ret = -1;
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ if (!gd_should_i_start_rebalance(volinfo)) {
+ /* Store the rebalance-id and rebalance command
+ * even if the peer isn't starting a rebalance
+ * process. On peers where a rebalance process
+ * is started, glusterd_handle_defrag_start
+ * performs the storing.
+ * Storing this is needed for having
+ * 'volume status' work correctly.
+ */
+ glusterd_store_perform_node_state_store(volinfo);
+ break;
+ }
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg),
+ cmd, NULL, GD_OP_REBALANCE);
+ break;
+ } else {
+ /* Reset defrag status to 'STARTED' so that the
+ * pid is checked and restarted accordingly.
+ * If the pid is not running it executes the
+ * "NOT_STARTED" case and restarts the process
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = GD_OP_REBALANCE;
+
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_restart_rebalance_for_volume(volinfo);
+ break;
+ }
+ case GF_DEFRAG_CMD_STOP:
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
+ */
+ gf_uuid_clear(volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+
+ /* Fall back to the old volume file in case of decommission*/
+ cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
+ brick_list)
+ {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ volfile_update = _gf_true;
+ }
+
+ if (volfile_update == _gf_false) {
+ ret = 0;
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
goto out;
}
- if ((!is_force) && glusterd_is_tier_daemon_running(volinfo)) {
- ret = gf_asprintf(op_errstr,
- "A Tier daemon is "
- "already running on volume %s",
- volname);
- ret = -1;
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo");
goto out;
}
- /* Fall through */
+
+ ret = 0;
+ break;
+
+ case GF_DEFRAG_CMD_STATUS:
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
+{
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *task_id_str = NULL;
+ dict_t *op_ctx = NULL;
+ xlator_t *this = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to validate");
+ goto out;
+ }
+ switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
/* Check if the connected clients are all of version
@@ -747,7 +1117,6 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
goto out;
}
break;
- case GF_DEFRAG_CMD_STATUS_TIER:
case GF_DEFRAG_CMD_STATUS:
case GF_DEFRAG_CMD_STOP:
@@ -792,38 +1161,8 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
goto out;
}
}
- if (cmd == GF_DEFRAG_CMD_STATUS_TIER) {
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not "
- "a tier volume.",
- volinfo->volname);
- ret = -1;
- goto out;
- }
- }
-
break;
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_DETACH_STATUS:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not "
- "a tier volume.",
- volinfo->volname);
- ret = -1;
- goto out;
- }
-
- if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
- snprintf(msg, sizeof(msg),
- "Detach-tier "
- "not started");
- ret = -1;
- goto out;
- }
- break;
default:
break;
}
@@ -879,8 +1218,7 @@ glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* Set task-id, if available, in op_ctx dict for operations other than
* start
*/
- if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STATUS_TIER) {
+ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
ctx = glusterd_op_get_ctx();
if (!ctx) {
@@ -910,7 +1248,6 @@ glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
- case GF_DEFRAG_CMD_START_TIER:
ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
if (ret)
@@ -979,7 +1316,6 @@ glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
}
case GF_DEFRAG_CMD_STOP:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
/* Clear task-id only on explicitly stopping rebalance.
* Also clear the stored operation, so it doesn't cause trouble
* with future rebalance/remove-brick starts
@@ -1017,20 +1353,10 @@ glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER &&
- cmd == GF_OP_CMD_STOP_DETACH_TIER) {
- glusterd_defrag_info_set(volinfo, dict,
- GF_DEFRAG_CMD_START_TIER,
- GF_DEFRAG_CMD_START, GD_OP_REBALANCE);
- glusterd_restart_rebalance_for_volume(volinfo);
- }
-
ret = 0;
break;
- case GF_DEFRAG_CMD_START_DETACH_TIER:
case GF_DEFRAG_CMD_STATUS:
- case GF_DEFRAG_CMD_STATUS_TIER:
break;
default:
break;
@@ -1068,23 +1394,11 @@ glusterd_defrag_event_notify_handle(dict_t *dict)
volname_ptr = strchr(volname_ptr, '/');
volname = volname_ptr + 1;
} else {
- volname_ptr = strstr(volname, "tierd/");
- if (volname_ptr) {
- volname_ptr = strchr(volname_ptr, '/');
- if (!volname_ptr) {
- ret = -1;
- goto out;
- }
- volname = volname_ptr + 1;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
- "volname received (%s) is not prefixed with "
- "rebalance or tierd.",
- volname);
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
+ "volname received (%s) is not prefixed with rebalance.",
+ volname);
+ ret = -1;
+ goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index f14e79ecf5f..43c2f4373e0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-geo-rep.h"
@@ -18,13 +18,12 @@
#include "glusterd-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
-#include "glusterd-nfs-svc.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
#include "glusterd-mgmt.h"
-#include "run.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <signal.h>
@@ -228,6 +227,20 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
is_force = _gf_true;
}
+ if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
+ msg[0] = '\0';
+ }
+
+ glusterd_add_peers_to_auth_list(volname);
+
ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
&dst_brickinfo, &host, dict,
&dup_dstbrick);
@@ -268,34 +281,37 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
}
if (!gf_is_local_addr(host)) {
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, host);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s, is not a friend", host);
*op_errstr = gf_strdup(msg);
+ goto out;
} else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
snprintf(msg, sizeof(msg),
"%s, is not connected at "
"the moment",
host);
*op_errstr = gf_strdup(msg);
- ret = -1;
+ goto out;
} else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
+ RCU_READ_UNLOCK;
+ ret = -1;
snprintf(msg, sizeof(msg),
"%s, is not befriended "
"at the moment",
host);
*op_errstr = gf_strdup(msg);
- ret = -1;
- }
- rcu_read_unlock();
-
- if (ret)
goto out;
+ }
+ RCU_READ_UNLOCK;
} else if (priv->op_version >= GD_OP_VERSION_3_6_0) {
/* A bricks mount dir is required only by snapshots which were
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
index 41adc40b5ce..e4d247a1d6c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-geo-rep.h"
@@ -18,12 +18,11 @@
#include "glusterd-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
-#include "glusterd-nfs-svc.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
#include "glusterd-mgmt.h"
-#include "run.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <signal.h>
@@ -153,35 +152,38 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
if (ret)
goto out;
} else {
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, host);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s, is not a friend.", host);
*op_errstr = gf_strdup(msg);
+ goto out;
} else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
snprintf(msg, sizeof(msg),
"%s,"
"is not connected at "
"the moment.",
host);
*op_errstr = gf_strdup(msg);
- ret = -1;
+ goto out;
} else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
+ RCU_READ_UNLOCK;
+ ret = -1;
snprintf(msg, sizeof(msg),
"%s, is not befriended "
"at the moment.",
host);
*op_errstr = gf_strdup(msg);
- ret = -1;
- }
- rcu_read_unlock();
-
- if (ret)
goto out;
+ }
+ RCU_READ_UNLOCK;
}
if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 40e22deff9b..88662e3bbae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -14,19 +14,25 @@
#include "xdr-generic.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "glusterd-op-sm.h"
#include "glusterd-sm.h"
#include "glusterd.h"
#include "protocol-common.h"
#include "glusterd-utils.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-messages.h"
#include "glusterd-snapshot-utils.h"
#include <sys/uio.h>
#define SERVER_PATH_MAX (16 * 1024)
+#define GLUSTERD_STACK_DESTROY(frame) \
+ do { \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ } while (0)
+
extern glusterd_op_info_t opinfo;
extern uuid_t global_txn_id;
@@ -58,8 +64,6 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
ctx = op_ctx;
switch (op) {
- case GD_OP_DETACH_TIER:
- case GD_OP_REMOVE_TIER_BRICK:
case GD_OP_REMOVE_BRICK: {
if (ctx)
ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr);
@@ -70,9 +74,6 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
errstr = "Error while resetting options";
break;
}
- case GD_OP_TIER_MIGRATE:
- case GD_OP_TIER_STATUS:
- case GD_OP_DETACH_TIER_STATUS:
case GD_OP_REBALANCE:
case GD_OP_DEFRAG_BRICK_VOLUME: {
if (ctx) {
@@ -138,9 +139,14 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
case GD_OP_SCRUB_ONDEMAND:
case GD_OP_RESET_BRICK:
case GD_OP_MAX_OPVERSION:
- case GD_OP_TIER_START_STOP:
case GD_OP_DETACH_NOT_STARTED:
case GD_OP_GANESHA:
+ case GD_OP_DETACH_TIER:
+ case GD_OP_TIER_MIGRATE:
+ case GD_OP_TIER_START_STOP:
+ case GD_OP_TIER_STATUS:
+ case GD_OP_DETACH_TIER_STATUS:
+ case GD_OP_REMOVE_TIER_BRICK:
case GD_OP_ADD_TIER_BRICK:
{
@@ -177,10 +183,8 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
ret = dict_allocate_and_serialize(ctx, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to "
- "serialize buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
else
free_ptr = rsp.dict.dict_val;
}
@@ -266,14 +270,15 @@ __glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peerd %s(%s)", rsp.hostname,
uuid_utoa(rsp.uuid));
- goto unlock;
+ goto out;
}
/*
@@ -387,9 +392,10 @@ cont:
ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL,
"Unable to get event");
- goto unlock;
+ goto out;
}
event->peername = gf_strdup(peerinfo->hostname);
@@ -403,7 +409,7 @@ cont:
"Received resp to probe req");
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
out:
free(rsp.hostname); // malloced by xdr
@@ -467,16 +473,17 @@ __glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count,
(op_ret) ? "RJT" : "ACC", uuid_utoa(rsp.uuid), rsp.hostname,
rsp.port);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK
ret = -1;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
"received friend add response from"
" unknown peer uuid: %s",
uuid_utoa(rsp.uuid));
- goto unlock;
+ goto out;
}
if (op_ret)
@@ -507,7 +514,7 @@ __glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = glusterd_friend_sm_inject_event(event);
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
out:
ctx = ((call_frame_t *)myframe)->local;
((call_frame_t *)myframe)->local = NULL;
@@ -589,7 +596,7 @@ __glusterd_friend_remove_cbk(struct rpc_req *req, struct iovec *iov, int count,
rsp.port);
inject:
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(rsp.uuid, ctx->hostname);
if (peerinfo == NULL) {
@@ -622,7 +629,7 @@ inject:
op_ret = 0;
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
respond:
ret = glusterd_xfer_cli_deprobe_resp(ctx->req, op_ret, op_errno, NULL,
@@ -748,9 +755,9 @@ __glusterd_cluster_lock_cbk(struct rpc_req *req, struct iovec *iov, int count,
uuid_utoa(rsp.uuid));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -865,9 +872,9 @@ glusterd_mgmt_v3_lock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov,
uuid_utoa(rsp.uuid));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -973,9 +980,9 @@ glusterd_mgmt_v3_unlock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov,
uuid_utoa(rsp.uuid));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
@@ -1079,9 +1086,9 @@ __glusterd_cluster_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count,
uuid_utoa(rsp.uuid));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
@@ -1203,7 +1210,7 @@ out:
uuid_utoa(rsp.uuid));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL);
if (peerinfo == NULL) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -1230,7 +1237,7 @@ out:
event_type = GD_OP_EVENT_RCVD_ACC;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
if (ret)
@@ -1357,7 +1364,7 @@ __glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
uuid_utoa(*txn_id));
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL);
if (peerinfo == NULL) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -1408,7 +1415,7 @@ __glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
out:
@@ -1455,6 +1462,7 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data)
dict_t *dict = NULL;
if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = -1;
goto out;
}
@@ -1464,15 +1472,24 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
goto out;
+ }
ret = dict_get_int32n(dict, "port", SLEN("port"), &port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_DEBUG, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=port", NULL);
port = GF_DEFAULT_BASE_PORT;
+ }
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, MY_UUID);
req.hostname = gf_strdup(hostname);
@@ -1501,6 +1518,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
dict_t *peer_data = NULL;
if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
ret = -1;
goto out;
}
@@ -1510,11 +1528,11 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
@@ -1522,17 +1540,18 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
goto out;
}
- gf_uuid_copy(req.uuid, MY_UUID);
req.hostname = gf_strdup(peerinfo->hostname);
req.port = peerinfo->port;
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
- ret = glusterd_add_volumes_to_export_dict(&peer_data);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Unable to add list of volumes "
- "in the peer_data dict for handshake");
+ gf_uuid_copy(req.uuid, MY_UUID);
+
+ peer_data = dict_new();
+ if (!peer_data) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ errno = ENOMEM;
goto out;
}
@@ -1563,10 +1582,26 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
}
}
- ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
- &req.vols.vols_len);
- if (ret)
+ /* Don't add any key-value in peer_data dictionary after call this function
+ */
+ ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to add list of volumes "
+ "in the peer_data dict for handshake");
goto out;
+ }
+
+ if (!req.vols.vols_len) {
+ ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ }
ret = glusterd_submit_request(
peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL,
@@ -1604,11 +1639,11 @@ glusterd_rpc_friend_remove(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
@@ -1625,7 +1660,7 @@ glusterd_rpc_friend_remove(call_frame_t *frame, xlator_t *this, void *data)
glusterd_friend_remove_cbk,
(xdrproc_t)xdr_gd1_mgmt_friend_req);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
out:
GF_FREE(req.hostname);
@@ -1739,8 +1774,11 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1750,9 +1788,8 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -1776,6 +1813,7 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -1815,8 +1853,11 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1826,9 +1867,8 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
@@ -1852,6 +1892,7 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -1921,7 +1962,6 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_conf_t *priv = NULL;
dict_t *dict = NULL;
- gf_boolean_t is_alloc = _gf_true;
uuid_t *txn_id = NULL;
if (!this) {
@@ -1934,8 +1974,11 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -1945,9 +1988,8 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict "
- "to request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
/* Sending valid transaction ID to peers */
@@ -1969,6 +2011,7 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -1980,7 +2023,7 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gd1_mgmt_stage_op_req);
out:
- if ((_gf_true == is_alloc) && req.buf.buf_val)
+ if (req.buf.buf_val)
GF_FREE(req.buf.buf_val);
gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
@@ -1999,7 +2042,6 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_conf_t *priv = NULL;
dict_t *dict = NULL;
- gf_boolean_t is_alloc = _gf_true;
uuid_t *txn_id = NULL;
if (!this) {
@@ -2011,8 +2053,11 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(priv);
ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
goto out;
+ }
// peerinfo should not be in payload
dict_deln(dict, "peerinfo", SLEN("peerinfo"));
@@ -2022,9 +2067,8 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize dict to "
- "request buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
/* Sending valid transaction ID to peers */
@@ -2046,6 +2090,7 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
}
frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
ret = -1;
goto out;
}
@@ -2057,7 +2102,7 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gd1_mgmt_commit_op_req);
out:
- if ((_gf_true == is_alloc) && req.buf.buf_val)
+ if (req.buf.buf_val)
GF_FREE(req.buf.buf_val);
gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
@@ -2279,8 +2324,7 @@ glusterd_brick_op(call_frame_t *frame, xlator_t *this, void *data)
rpc = glusterd_pending_node_get_rpc(pending_node);
if (!rpc) {
- if (pending_node->type == GD_NODE_REBALANCE ||
- pending_node->type == GD_NODE_TIERD) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
opinfo.brick_pending_count = 0;
ret = 0;
GF_FREE(req->input.input_val);
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
index f7c23ce57c3..c49a0eefba5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -117,8 +117,10 @@ glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags)
dict_t *cmdict = NULL;
cmdict = dict_new();
- if (!cmdict)
+ if (!cmdict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto error_return;
+ }
ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel");
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
index cfa0cce0aba..b0b8a2e4018 100644
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-messages.h"
@@ -89,12 +89,15 @@ glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
ret = dict_get_str(dict, "volname", &volname);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
ret = 0;
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
ret = 0;
goto out;
}
@@ -217,7 +220,7 @@ glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
if (active_count)
*active_count = 1;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
if (_is_contributing_to_quorum(peerinfo->quorum_contrib))
@@ -225,7 +228,7 @@ glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
*active_count = *active_count + 1;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
ret = dict_get_str(conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
if (ret == 0) {
@@ -252,8 +255,11 @@ glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo)
int ret = 0;
ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type);
- if (ret)
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL);
goto out;
+ }
if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
res = _gf_true;
@@ -287,8 +293,11 @@ does_gd_meet_server_quorum(xlator_t *this)
ret = glusterd_get_quorum_cluster_counts(this, &active_count,
&quorum_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL);
goto out;
+ }
if (!does_quorum_meet(active_count, quorum_count)) {
goto out;
@@ -372,6 +381,7 @@ glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo,
if (!brickinfo->start_triggered) {
pthread_mutex_lock(&brickinfo->restart_mutex);
{
+ /* coverity[SLEEP] */
ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
_gf_false);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
new file mode 100644
index 00000000000..5661e391a9c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-shd-svc-helper.h"
+#include "glusterd-messages.h"
+#include "glusterd-volgen.h"
+
+void
+glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char sockfilepath[PATH_MAX] = {
+ 0,
+ };
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+ uuid_utoa(MY_UUID));
+ if ((len < 0) || (len >= sizeof(sockfilepath))) {
+ sockfilepath[0] = 0;
+ }
+
+ glusterd_set_socket_filepath(sockfilepath, path, path_len);
+}
+
+void
+glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname);
+}
+
+void
+glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_svc_t *svc = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t need_unref = _gf_false;
+ rpc_clnt_t *rpc = NULL;
+
+ conf = THIS->private;
+ if (!conf)
+ return;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, shd, out);
+
+ svc = &shd->svc;
+ shd->attached = _gf_false;
+
+ if (svc->conn.rpc) {
+ rpc_clnt_unref(svc->conn.rpc);
+ svc->conn.rpc = NULL;
+ }
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ svc_proc = svc->svc_proc;
+ svc->svc_proc = NULL;
+ svc->inited = _gf_false;
+ cds_list_del_init(&svc->mux_svc);
+ glusterd_unlink_file(svc->proc.pidfile);
+
+ if (svc_proc && cds_list_empty(&svc_proc->svcs)) {
+ cds_list_del_init(&svc_proc->svc_proc_list);
+ /* We cannot free svc_proc list from here. Because
+ * if there are pending events on the rpc, it will
+ * try to access the corresponding svc_proc, so unrefing
+ * rpc request and then cleaning up the memory is carried
+ * from the notify function upon RPC_CLNT_DESTROY destroy.
+ */
+ need_unref = _gf_true;
+ rpc = svc_proc->rpc;
+ svc_proc->rpc = NULL;
+ }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ /*rpc unref has to be performed outside the lock*/
+ if (need_unref && rpc)
+ rpc_clnt_unref(rpc);
+out:
+ return;
+}
+
+int
+glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict)
+{
+ int ret = -1;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ svc = &(volinfo->shd.svc);
+
+ ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set pidfile %s in dict", svc->proc.pidfile);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
new file mode 100644
index 00000000000..1f0984ba857
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SHD_SVC_HELPER_H_
+#define _GLUSTERD_SHD_SVC_HELPER_H_
+
+#include "glusterd.h"
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
+
+int
+glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ glusterd_svc_t *svc, int flags);
+
+int
+glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
index a096649fb44..1c56384a14b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -8,14 +8,15 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
-#include "glusterd-svc-mgmt.h"
#include "glusterd-shd-svc.h"
+#include "glusterd-shd-svc-helper.h"
#include "glusterd-svc-helper.h"
+#include "glusterd-store.h"
#define GD_SHD_PROCESS_NAME "--process-name"
char *shd_svc_name = "glustershd";
@@ -23,52 +24,186 @@ char *shd_svc_name = "glustershd";
void
glusterd_shdsvc_build(glusterd_svc_t *svc)
{
+ int ret = -1;
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+ if (ret < 0)
+ return;
+
+ CDS_INIT_LIST_HEAD(&svc->mux_svc);
svc->manager = glusterd_shdsvc_manager;
svc->start = glusterd_shdsvc_start;
- svc->stop = glusterd_svc_stop;
+ svc->stop = glusterd_shdsvc_stop;
+ svc->reconfigure = glusterd_shdsvc_reconfigure;
}
int
-glusterd_shdsvc_init(glusterd_svc_t *svc)
+glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ glusterd_svc_proc_t *mux_svc)
{
- return glusterd_svc_init(svc, shd_svc_name);
+ int ret = -1;
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ char sockpath[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char volfile[PATH_MAX] = {
+ 0,
+ };
+ char logdir[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volfileid[256] = {0};
+ glusterd_svc_t *svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_muxsvc_conn_notify_t notify = NULL;
+ xlator_t *this = NULL;
+ char *volfileserver = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ volinfo = data;
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+ GF_VALIDATE_OR_GOTO(this->name, mux_svc, out);
+
+ svc = &(volinfo->shd.svc);
+
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+ if (ret < 0)
+ goto out;
+
+ notify = glusterd_muxsvc_common_rpc_notify;
+ glusterd_store_perform_node_state_store(volinfo);
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ glusterd_svc_create_rundir(rundir);
+
+ glusterd_svc_build_logfile_path(shd_svc_name, priv->logdir, logfile,
+ sizeof(logfile));
+
+ /* Initialize the connection mgmt */
+ if (mux_conn && mux_svc->rpc) {
+ /* multiplexed svc */
+ svc->conn.frame_timeout = mux_conn->frame_timeout;
+ /* This will be unrefed from glusterd_shd_svcproc_cleanup*/
+ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
+ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
+ mux_conn->sockpath);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = mkdir_p(priv->logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+ goto out;
+ }
+
+ glusterd_svc_build_shd_socket_filepath(volinfo, sockpath,
+ sizeof(sockpath));
+ ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600,
+ notify);
+ if (ret)
+ goto out;
+ /* This will be unrefed when the last svcs is detached from the list */
+ if (!mux_svc->rpc)
+ mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc);
+ }
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX);
+ len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname);
+ if ((len < 0) || (len >= sizeof(volfileid))) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+ ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_shdsvc_create_volfile()
+int
+glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
{
char filepath[PATH_MAX] = {
0,
};
+
int ret = -1;
- glusterd_conf_t *conf = THIS->private;
dict_t *mod_dict = NULL;
-
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX);
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ /* If volfile exist, delete it. This case happens when we
+ * change from replica/ec to distribute.
+ */
+ (void)glusterd_unlink_file(filepath);
+ ret = 0;
+ goto out;
+ }
mod_dict = dict_new();
- if (!mod_dict)
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
goto out;
+ }
ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
goto out;
+ }
- glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath,
- sizeof(filepath));
- ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict);
+ ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
"Failed to create volfile");
goto out;
}
@@ -76,31 +211,109 @@ glusterd_shdsvc_create_volfile()
out:
if (mod_dict)
dict_unref(mod_dict);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
+gf_boolean_t
+glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *temp_svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t comp = _gf_false;
+ glusterd_conf_t *conf = THIS->private;
+
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ svc_proc = svc->svc_proc;
+ if (!svc_proc)
+ goto unlock;
+ cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc)
+ {
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ goto unlock;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ goto unlock;
+ }
+ if (!glusterd_is_shd_compatible_volume(volinfo))
+ continue;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED)
+ goto unlock;
+ }
+ comp = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->attach_lock);
+out:
+ return comp;
+}
+
int
glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
{
- int ret = 0;
+ int ret = -1;
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t shd_restart = _gf_false;
- if (!svc->inited) {
- ret = glusterd_shdsvc_init(svc);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
- "Failed to init shd "
- "service");
- goto out;
- } else {
- svc->inited = _gf_true;
- gf_msg_debug(THIS->name, 0, "shd service initialized");
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo->is_snap_volume) {
+ /* healing of a snap volume is not supported yet*/
+ ret = 0;
+ goto out;
+ }
+
+ while (conf->restart_shd) {
+ synccond_wait(&conf->cond_restart_shd, &conf->big_lock);
+ }
+ conf->restart_shd = _gf_true;
+ shd_restart = _gf_true;
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ ret = 0;
+ if (svc->inited) {
+ /* This means glusterd was running for this volume and now
+ * it was converted to a non-shd volume. So just stop the shd
+ */
+ ret = svc->stop(svc, SIGTERM);
}
+ goto out;
}
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret)
+ goto out;
- volinfo = data;
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+ "Failed to init shd service");
+ goto out;
+ }
/* If all the volumes are stopped or all shd compatible volumes
* are stopped then stop the service if:
@@ -110,31 +323,31 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
* - volinfo is NULL or
* - volinfo is present and volume is shd compatible
*/
- if (glusterd_are_all_volumes_stopped() ||
- glusterd_all_shd_compatible_volumes_stopped()) {
- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
- ret = svc->stop(svc, SIGTERM);
- }
- } else {
- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) {
- ret = glusterd_shdsvc_create_volfile();
- if (ret)
- goto out;
-
+ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) {
+ /* TODO
+ * Take a lock and detach all svc's to stop the process
+ * also reset the init flag
+ */
+ ret = svc->stop(svc, SIGTERM);
+ } else if (volinfo) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
ret = svc->stop(svc, SIGTERM);
if (ret)
goto out;
-
+ }
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
ret = svc->start(svc, flags);
if (ret)
goto out;
-
- ret = glusterd_conn_connect(&(svc->conn));
- if (ret)
- goto out;
}
}
out:
+ if (shd_restart) {
+ conf->restart_shd = _gf_false;
+ synccond_broadcast(&conf->cond_restart_shd);
+ }
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
if (ret)
gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
@@ -143,11 +356,14 @@ out:
}
int
-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
{
int ret = -1;
char glusterd_uuid_option[PATH_MAX] = {0};
+ char client_pid[32] = {0};
dict_t *cmdline = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
cmdline = dict_new();
if (!cmdline)
@@ -158,51 +374,190 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
if (ret < 0)
goto out;
+ ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d",
+ GF_CLIENT_PID_SELF_HEALD);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_str(cmdline, "arg", client_pid);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg", NULL);
+ goto out;
+ }
+
/* Pass cmdline arguments as key-value pair. The key is merely
* a carrier and is not used. Since dictionary follows LIFO the value
* should be put in reverse order*/
ret = dict_set_str(cmdline, "arg4", svc->name);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg4", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg3", GD_SHD_PROCESS_NAME);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg3", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg2", glusterd_uuid_option);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg2", NULL);
goto out;
+ }
ret = dict_set_str(cmdline, "arg1", "--xlator-option");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg1", NULL);
goto out;
+ }
ret = glusterd_svc_start(svc, flags, cmdline);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL, NULL);
+ goto out;
+ }
+ ret = glusterd_conn_connect(&(svc->conn));
out:
if (cmdline)
dict_unref(cmdline);
+ return ret;
+}
+
+int
+glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+
+ if (!conf || !volinfo || !svc)
+ return -1;
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ mux_proc = glusterd_svcprocess_new();
+ if (!mux_proc) {
+ return -1;
+ }
+ ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc);
+ if (ret)
+ return -1;
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+ svc->svc_proc = mux_proc;
+ cds_list_del_init(&svc->mux_svc);
+ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+
+ ret = glusterd_new_shd_svc_start(svc, flags);
+ if (!ret) {
+ volinfo->shd.attached = _gf_true;
+ }
+ return ret;
+}
+
+int
+glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ return -1;
+ }
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ return -1;
+ }
+
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ return -1;
+
+ glusterd_volinfo_ref(volinfo);
+
+ if (!svc->inited) {
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret)
+ goto out;
+ }
+
+ if (shd->attached) {
+ glusterd_volinfo_ref(volinfo);
+ /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to attach shd svc(volume=%s) to pid=%d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ glusterd_volinfo_unref(volinfo);
+ goto out1;
+ }
+ goto out;
+ }
+ ret = glusterd_new_shd_svc_start(svc, flags);
+ if (!ret) {
+ shd->attached = _gf_true;
+ }
+out:
+ if (ret && volinfo)
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+out1:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
int
-glusterd_shdsvc_reconfigure()
+glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
{
int ret = -1;
xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
gf_boolean_t identical = _gf_false;
+ dict_t *mod_dict = NULL;
+ glusterd_svc_t *svc = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
+ if (!volinfo) {
+ /* reconfigure will be called separately*/
+ ret = 0;
+ goto out;
+ }
- if (glusterd_all_shd_compatible_volumes_stopped())
+ glusterd_volinfo_ref(volinfo);
+ svc = &(volinfo->shd.svc);
+ if (glusterd_svcs_shd_compatible_volumes_stopped(svc))
goto manager;
/*
@@ -210,8 +565,59 @@ glusterd_shdsvc_reconfigure()
* and cksum i.e. "character-by-character". If YES, then
* NOTHING has been changed, just return.
*/
- ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name,
- build_shd_graph, &identical);
+
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ if (svc->inited)
+ goto manager;
+
+ /* Nothing to do if not shd compatible */
+ ret = 0;
+ goto out;
+ }
+ mod_dict = dict_new();
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32(mod_dict, "graph-check", 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=graph-check", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
+ goto out;
+ }
+
+ ret = glusterd_volume_svc_check_volfile_identical(
+ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+ &identical);
if (ret)
goto out;
@@ -226,8 +632,9 @@ glusterd_shdsvc_reconfigure()
* changed, then inform the xlator to reconfigure the options.
*/
identical = _gf_false; /* RESET the FLAG */
- ret = glusterd_svc_check_topology_identical(priv->shd_svc.name,
- build_shd_graph, &identical);
+ ret = glusterd_volume_svc_check_topology_identical(
+ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+ &identical);
if (ret)
goto out;
@@ -235,7 +642,7 @@ glusterd_shdsvc_reconfigure()
* options to shd volfile, so that shd will be reconfigured.
*/
if (identical) {
- ret = glusterd_shdsvc_create_volfile();
+ ret = glusterd_shdsvc_create_volfile(volinfo);
if (ret == 0) { /* Only if above PASSES */
ret = glusterd_fetchspec_notify(THIS);
}
@@ -243,12 +650,147 @@ glusterd_shdsvc_reconfigure()
}
manager:
/*
- * shd volfile's topology has been changed. shd server needs
- * to be RESTARTED to ACT on the changed volfile.
+ * shd volfile's topology has been changed. volfile needs
+ * to be RECONFIGURED to ACT on the changed volfile.
*/
- ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
out:
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ if (mod_dict)
+ dict_unref(mod_dict);
gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_shdsvc_restart()
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ int ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ pthread_mutex_lock(&conf->volume_lock);
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ glusterd_volinfo_ref(volinfo);
+ pthread_mutex_unlock(&conf->volume_lock);
+ /* Start per volume shd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ svc = &(volinfo->shd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL,
+ "Couldn't start shd for "
+ "vol: %s on restart",
+ volinfo->volname);
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ glusterd_volinfo_unref(volinfo);
+ goto out;
+ }
+ }
+ glusterd_volinfo_unref(volinfo);
+ pthread_mutex_lock(&conf->volume_lock);
+ }
+ pthread_mutex_unlock(&conf->volume_lock);
+out:
+ return ret;
+}
+
+int
+glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t empty = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ int pid = -1;
+
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ svc_proc = svc->svc_proc;
+ if (!svc_proc) {
+ /*
+ * This can happen when stop was called on a volume that is not shd
+ * compatible.
+ */
+ gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped");
+ ret = 0;
+ goto out;
+ }
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ return -1;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ return -1;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ if (!gf_is_service_running(svc->proc.pidfile, &pid)) {
+ gf_msg_debug(THIS->name, 0, "shd isn't running");
+ }
+ cds_list_del_init(&svc->mux_svc);
+ empty = cds_list_empty(&svc_proc->svcs);
+ if (empty) {
+ svc_proc->status = GF_SVC_STOPPING;
+ cds_list_del_init(&svc_proc->svc_proc_list);
+ }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (empty) {
+ /* Unref will happen when destroying the connection */
+ glusterd_volinfo_ref(volinfo);
+ svc_proc->data = volinfo;
+ ret = glusterd_svc_stop(svc, sig);
+ if (ret) {
+ glusterd_volinfo_unref(volinfo);
+ goto out;
+ }
+ }
+ if (!empty && pid != -1) {
+ ret = glusterd_detach_svc(svc, volinfo, sig);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "shd service is failed to detach volume %s from pid %d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ else
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "Shd service is detached for volume %s from pid %d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ }
+ svc->online = _gf_false;
+ (void)glusterd_unlink_file((char *)svc->proc.pidfile);
+ glusterd_shd_svcproc_cleanup(shd);
+ ret = 0;
+ glusterd_volinfo_unref(volinfo);
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
index 775a9d44a2c..55b409f4b69 100644
--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
@@ -12,12 +12,20 @@
#define _GLUSTERD_SHD_SVC_H_
#include "glusterd-svc-mgmt.h"
+#include "glusterd.h"
+
+typedef struct glusterd_shdsvc_ glusterd_shdsvc_t;
+struct glusterd_shdsvc_ {
+ glusterd_svc_t svc;
+ gf_boolean_t attached;
+};
void
glusterd_shdsvc_build(glusterd_svc_t *svc);
int
-glusterd_shdsvc_init(glusterd_svc_t *svc);
+glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ glusterd_svc_proc_t *svc_proc);
int
glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
@@ -27,4 +35,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags);
int
glusterd_shdsvc_reconfigure();
+
+int
+glusterd_shdsvc_restart();
+
+int
+glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig);
+
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index 35bc71455d2..bf2d81b644a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -13,20 +13,20 @@
#include <sys/resource.h>
#include <libgen.h>
-#include "compat-uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "fnmatch.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "protocol-common.h"
#include "glusterd.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/list.h>
#include "glusterd-messages.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
@@ -146,24 +146,35 @@ glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid)
ctx.op = GD_FRIEND_UPDATE_DEL;
friends = dict_new();
- if (!friends)
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "op");
ret = dict_set_int32n(friends, key, keylen, ctx.op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "hostname");
ret = dict_set_strn(friends, key, keylen, hostname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
if (!peerinfo->connected || !peerinfo->peer)
@@ -175,9 +186,10 @@ glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid)
*/
ret = dict_set_static_ptr(friends, "peerinfo", peerinfo);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
- goto unlock;
+ goto out;
}
proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
@@ -185,15 +197,13 @@ glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid)
ret = proc->fn(NULL, this, friends);
}
}
-unlock:
- rcu_read_unlock();
-
- gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ RCU_READ_UNLOCK;
out:
if (friends)
dict_unref(friends);
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
return ret;
}
@@ -229,29 +239,32 @@ glusterd_ac_reverse_probe_begin(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(event);
GF_ASSERT(ctx);
- rcu_read_lock();
+ new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
+
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
- ret = -1;
goto out;
}
ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_PROBE, &new_event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
"Unable to get new new_event");
ret = -1;
goto out;
}
- new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
-
if (!new_ev_ctx) {
+ RCU_READ_UNLOCK;
ret = -1;
goto out;
}
@@ -266,6 +279,8 @@ glusterd_ac_reverse_probe_begin(glusterd_friend_sm_event_t *event, void *ctx)
ret = glusterd_friend_sm_inject_event(new_event);
+ RCU_READ_UNLOCK;
+
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
"Unable to inject new_event %d, "
@@ -274,8 +289,6 @@ glusterd_ac_reverse_probe_begin(glusterd_friend_sm_event_t *event, void *ctx)
}
out:
- rcu_read_unlock();
-
if (ret) {
if (new_event)
GF_FREE(new_event->peername);
@@ -305,31 +318,34 @@ glusterd_ac_friend_add(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(conf);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
goto out;
}
- if (!peerinfo->peer)
+ if (!peerinfo->peer) {
+ RCU_READ_UNLOCK;
goto out;
+ }
proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
if (proc->fn) {
frame = create_frame(this, this->ctx->pool);
if (!frame) {
+ RCU_READ_UNLOCK;
goto out;
}
frame->local = ctx;
ret = proc->fn(frame, this, event);
}
+ RCU_READ_UNLOCK;
out:
- rcu_read_unlock();
-
if (ret && frame)
STACK_DESTROY(frame->root);
@@ -361,34 +377,49 @@ glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(conf);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, probe_ctx->hostname);
if (peerinfo == NULL) {
// We should not reach this state ideally
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
ret = -1;
- goto out;
+ goto unlock;
}
- if (!peerinfo->peer)
- goto out;
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
+ goto unlock;
+ }
proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
if (proc->fn) {
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto unlock;
}
frame->local = ctx;
dict = dict_new();
- if (!dict)
- goto out;
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto unlock;
+ }
ret = dict_set_strn(dict, "hostname", SLEN("hostname"),
probe_ctx->hostname);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=hostname", NULL);
+ goto unlock;
+ }
ret = dict_set_int32n(dict, "port", SLEN("port"), probe_ctx->port);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=port", NULL);
+ goto unlock;
+ }
/* The peerinfo reference being set here is going to be used
* only within this critical section, in glusterd_rpc_probe
@@ -396,6 +427,7 @@ glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx)
*/
ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
goto out;
@@ -403,11 +435,11 @@ glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx)
ret = proc->fn(frame, this, dict);
if (ret)
- goto out;
+ goto unlock;
}
-
+unlock:
+ RCU_READ_UNLOCK;
out:
- rcu_read_unlock();
if (dict)
dict_unref(dict);
@@ -440,10 +472,12 @@ glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event,
GF_ASSERT(conf);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
@@ -471,23 +505,29 @@ glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event,
glusterd_broadcast_friend_delete(ctx->hostname, NULL);
glusterd_destroy_probe_ctx(ctx);
}
- goto out;
+ goto unlock;
}
- if (!peerinfo->peer)
- goto out;
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
+ goto unlock;
+ }
proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
if (proc->fn) {
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto unlock;
}
frame->local = data;
ret = proc->fn(frame, this, event);
}
+unlock:
+ RCU_READ_UNLOCK;
out:
- rcu_read_unlock();
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
@@ -501,13 +541,11 @@ static gf_boolean_t
glusterd_should_update_peer(glusterd_peerinfo_t *peerinfo,
glusterd_peerinfo_t *cur_peerinfo)
{
- gf_boolean_t is_valid = _gf_false;
-
if ((peerinfo == cur_peerinfo) ||
(peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
- is_valid = _gf_true;
+ return _gf_true;
- return is_valid;
+ return _gf_false;
}
static int
@@ -534,27 +572,33 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(priv);
- rcu_read_lock();
+ keylen = snprintf(key, sizeof(key), "op");
+ friends = dict_new();
+
+ RCU_READ_LOCK;
cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!cur_peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
- ret = -1;
goto out;
}
- ev_ctx.op = GD_FRIEND_UPDATE_ADD;
-
- friends = dict_new();
- if (!friends)
- goto out;
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto unlock;
+ }
- keylen = snprintf(key, sizeof(key), "op");
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -566,12 +610,15 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
snprintf(key, sizeof(key), "friend%d", count);
ret = gd_add_friend_to_dict(peerinfo, friends, key);
if (ret)
- goto out;
+ goto unlock;
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -583,6 +630,7 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
ret = dict_set_static_ptr(friends, "peerinfo", peerinfo);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
goto out;
@@ -594,14 +642,14 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
}
}
- gf_msg_debug("glusterd", 0, "Returning with %d", ret);
-
+unlock:
+ RCU_READ_UNLOCK;
out:
- rcu_read_unlock();
if (friends)
dict_unref(friends);
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
return ret;
}
@@ -632,14 +680,18 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(priv);
- rcu_read_lock();
+ friends = dict_new();
+ keylen = snprintf(key, sizeof(key), "op");
+
+ RCU_READ_LOCK;
cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!cur_peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
- ret = -1;
goto out;
}
@@ -649,19 +701,21 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
*/
if (!cur_peerinfo->connected || !cur_peerinfo->peer) {
ret = 0;
- goto out;
+ goto unlock;
}
- ev_ctx.op = GD_FRIEND_UPDATE_ADD;
-
- friends = dict_new();
- if (!friends)
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
- keylen = snprintf(key, sizeof(key), "op");
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto unlock;
+ }
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -673,15 +727,19 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
snprintf(key, sizeof(key), "friend%d", count);
ret = gd_add_friend_to_dict(peerinfo, friends, key);
if (ret)
- goto out;
+ goto unlock;
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
- goto out;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto unlock;
+ }
ret = dict_set_static_ptr(friends, "peerinfo", cur_peerinfo);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"failed to set peerinfo");
goto out;
@@ -693,8 +751,9 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
gf_msg_debug(this->name, 0, "Returning with %d", ret);
+unlock:
+ RCU_READ_UNLOCK;
out:
- rcu_read_unlock();
if (friends)
dict_unref(friends);
@@ -738,13 +797,13 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
}
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ svc = &(volinfo->shd.svc);
ret = svc->stop(svc, SIGTERM);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
"Failed "
- "to stop tierd daemon service");
+ "to stop shd daemon service");
}
}
@@ -775,7 +834,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
}
/*Reconfigure all daemon services upon peer detach*/
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(NULL);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
"Failed to reconfigure all daemon services.");
@@ -801,13 +860,13 @@ glusterd_ac_handle_friend_remove_req(glusterd_friend_sm_event_t *event,
ret = glusterd_xfer_friend_remove_resp(ev_ctx->req, ev_ctx->hostname,
ev_ctx->port);
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_REMOVE_FRIEND,
&new_event);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
goto out;
}
@@ -816,13 +875,13 @@ glusterd_ac_handle_friend_remove_req(glusterd_friend_sm_event_t *event,
ret = glusterd_friend_sm_inject_event(new_event);
if (ret) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
goto out;
}
new_event = NULL;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
glusterd_peer_detach_cleanup(priv);
out:
@@ -842,22 +901,22 @@ glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT(event);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
- rcu_read_unlock();
goto out;
}
ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid);
+ RCU_READ_UNLOCK;
if (ret)
gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
"Volumes cleanup failed");
- rcu_read_unlock();
/* Exiting read critical section as glusterd_peerinfo_cleanup calls
* synchronize_rcu before freeing the peerinfo
*/
@@ -905,14 +964,14 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
ev_ctx = ctx;
gf_uuid_copy(uuid, ev_ctx->uuid);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
"Could not find peer %s(%s)", event->peername,
uuid_utoa(event->peerid));
- ret = -1;
- rcu_read_unlock();
goto out;
}
@@ -922,7 +981,7 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
*/
gf_uuid_copy(peerinfo->uuid, ev_ctx->uuid);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
conf = this->private;
GF_ASSERT(conf);
@@ -1047,9 +1106,10 @@ glusterd_friend_sm_transition_state(uuid_t peerid, char *peername,
GF_ASSERT(state);
GF_ASSERT(peername);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(peerid, peername);
if (!peerinfo) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
goto out;
}
@@ -1061,7 +1121,7 @@ glusterd_friend_sm_transition_state(uuid_t peerid, char *peername,
ret = 0;
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return ret;
}
@@ -1413,25 +1473,24 @@ glusterd_friend_sm()
cds_list_del_init(&event->list);
event_type = event->event;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_PEER_NOT_FOUND,
"Received"
" event %s with empty peer info",
glusterd_friend_sm_event_name_get(event_type));
GF_FREE(event);
- rcu_read_unlock();
continue;
}
+ old_state = peerinfo->state.state;
+ RCU_READ_UNLOCK;
gf_msg_debug("glusterd", 0, "Dequeued event of type: '%s'",
glusterd_friend_sm_event_name_get(event_type));
- old_state = peerinfo->state.state;
-
- rcu_read_unlock();
/* Giving up read-critical section here as we only need
* the current state to call the handler.
*
@@ -1489,10 +1548,10 @@ glusterd_friend_sm()
/* We need to obtain peerinfo reference once again as we
* had exited the read critical section above.
*/
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
if (!peerinfo) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
/* A peer can only be deleted as a effect of
* this state machine, and two such state
* machines can never run at the same time.
@@ -1514,11 +1573,11 @@ glusterd_friend_sm()
}
ret = glusterd_store_peerinfo(peerinfo);
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
"Failed to store peerinfo");
}
- rcu_read_unlock();
glusterd_destroy_friend_event_context(event);
GF_FREE(event);
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
index 051e83c675d..11cbd85b3e3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
@@ -11,17 +11,14 @@
#define _GLUSTERD_SM_H_
#include <pthread.h>
-#include "compat-uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "rpc-clnt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "byte-order.h"
-//#include "glusterd.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "rpcsvc.h"
-#include "store.h"
+#include <glusterfs/store.h>
#include "glusterd-rcu.h"
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
index 7f5fa5f0240..d75f249b29e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "glusterd-messages.h"
@@ -20,14 +20,15 @@
#include "glusterd-snapd-svc.h"
#include "glusterd-snapd-svc-helper.h"
#include "glusterd-snapshot-utils.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
char *snapd_svc_name = "snapd";
static void
glusterd_svc_build_snapd_logdir(char *logdir, char *volname, size_t len)
{
- snprintf(logdir, len, "%s/snaps/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
+ glusterd_conf_t *priv = THIS->private;
+ snprintf(logdir, len, "%s/snaps/%s", priv->logdir, volname);
}
static void
@@ -86,8 +87,10 @@ glusterd_snapdsvc_init(void *data)
svc = &(volinfo->snapd.svc);
ret = snprintf(svc->name, sizeof(svc->name), "%s", snapd_svc_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
notify = glusterd_snapdsvc_rpc_notify;
@@ -114,6 +117,7 @@ glusterd_snapdsvc_init(void *data)
glusterd_svc_build_snapd_logfile(logfile, logdir, sizeof(logfile));
len = snprintf(volfileid, sizeof(volfileid), "snapd/%s", volinfo->volname);
if ((len < 0) || (len >= sizeof(volfileid))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -300,16 +304,22 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log",
svc->proc.logdir);
if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -366,6 +376,7 @@ int
glusterd_snapdsvc_restart()
{
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
int ret = 0;
xlator_t *this = THIS;
glusterd_conf_t *conf = NULL;
@@ -376,7 +387,7 @@ glusterd_snapdsvc_restart()
conf = this->private;
GF_ASSERT(conf);
- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
{
/* Start per volume snapd svc */
if (volinfo->status == GLUSTERD_STATUS_STARTED) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
index c95e4cc7661..e15dbf54315 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
@@ -17,8 +17,8 @@ typedef struct glusterd_snapdsvc_ glusterd_snapdsvc_t;
struct glusterd_snapdsvc_ {
glusterd_svc_t svc;
- int port;
gf_store_handle_t *handle;
+ int port;
};
void
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 5a9e198454d..995268b796d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -16,8 +16,8 @@
#endif
#include <dlfcn.h>
-#include "dict.h"
-#include "syscall.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/syscall.h>
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-messages.h"
@@ -200,7 +200,7 @@ glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict,
int32_t volcount)
{
char *value = NULL;
- char key[PATH_MAX] = "";
+ char key[64] = "";
int32_t brick_count = -1;
int32_t ret = -1;
xlator_t *this = NULL;
@@ -282,12 +282,10 @@ glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict,
new_volinfo->volume_id,
sizeof(new_volinfo->volume_id), XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL,
- "Failed to "
- "set extended attribute %s on %s. "
- "Reason: %s, snap: %s",
- GF_XATTR_VOL_ID_KEY, new_brickinfo->path,
- strerror(errno), new_volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, new_brickinfo->path,
+ strerror(errno), new_volinfo->volname, NULL);
goto out;
}
}
@@ -1961,9 +1959,7 @@ glusterd_update_snaps_synctask(void *opaque)
synclock_lock(&conf->big_lock);
while (conf->restart_bricks) {
- synclock_unlock(&conf->big_lock);
- sleep(2);
- synclock_lock(&conf->big_lock);
+ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
}
conf->restart_bricks = _gf_true;
@@ -2041,8 +2037,9 @@ glusterd_update_snaps_synctask(void *opaque)
"Failed to remove snap %s", snap->snapname);
goto out;
}
- if (dict)
- dict_unref(dict);
+
+ dict_unref(dict);
+ dict = NULL;
}
snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
ret = dict_get_int32(peer_data, buf, &val);
@@ -2070,6 +2067,7 @@ out:
if (dict)
dict_unref(dict);
conf->restart_bricks = _gf_false;
+ synccond_broadcast(&conf->cond_restart_bricks);
return ret;
}
@@ -2099,6 +2097,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
goto out;
}
+ if (!snap_count)
+ goto out;
+
for (i = 1; i <= snap_count; i++) {
/* Compare one snapshot from peer_data at a time */
ret = glusterd_compare_snap(peer_data, i, peername, peerid);
@@ -2146,18 +2147,27 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
snprintf(base_key, sizeof(base_key), "brick%d", count);
snprintf(key, sizeof(key), "%s.hostname", base_key);
ret = dict_set_str(dict, key, "Snapshot Daemon");
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.path", base_key);
ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(MY_UUID)));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.port", base_key);
ret = dict_set_int32(dict, key, volinfo->snapd.port);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile));
@@ -2167,8 +2177,11 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
snprintf(key, sizeof(key), "%s.pid", base_key);
ret = dict_set_int32(dict, key, pid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
goto out;
+ }
snprintf(key, sizeof(key), "%s.status", base_key);
ret = dict_set_int32(dict, key, brick_online);
@@ -2669,8 +2682,10 @@ glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo)
new_missed_snapinfo = GF_CALLOC(1, sizeof(*new_missed_snapinfo),
gf_gld_mt_missed_snapinfo_t);
- if (!new_missed_snapinfo)
+ if (!new_missed_snapinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
CDS_INIT_LIST_HEAD(&new_missed_snapinfo->missed_snaps);
CDS_INIT_LIST_HEAD(&new_missed_snapinfo->snap_ops);
@@ -2698,8 +2713,10 @@ glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op)
new_snap_op = GF_CALLOC(1, sizeof(*new_snap_op),
gf_gld_mt_missed_snapinfo_t);
- if (!new_snap_op)
+ if (!new_snap_op) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
new_snap_op->brick_num = -1;
new_snap_op->op = -1;
@@ -2851,19 +2868,21 @@ out:
return quorum_met;
}
-int32_t
+static int32_t
glusterd_volume_quorum_check(glusterd_volinfo_t *volinfo, int64_t index,
- dict_t *dict, char *key_prefix, int8_t snap_force,
- int quorum_count, char *quorum_type,
- char **op_errstr, uint32_t *op_errno)
+ dict_t *dict, const char *key_prefix,
+ int8_t snap_force, int quorum_count,
+ char *quorum_type, char **op_errstr,
+ uint32_t *op_errno)
{
int ret = 0;
xlator_t *this = NULL;
int64_t i = 0;
int64_t j = 0;
- char key[1024] = {
+ char key[128] = {
0,
- };
+ }; /* key_prefix is passed from above, but is really quite small */
+ int keylen;
int down_count = 0;
gf_boolean_t first_brick_on = _gf_true;
glusterd_conf_t *priv = NULL;
@@ -2892,9 +2911,10 @@ glusterd_volume_quorum_check(glusterd_volinfo_t *volinfo, int64_t index,
with replica count 2, quorum is not met if even
one of its subvolumes is down
*/
- snprintf(key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status",
- key_prefix, index, i);
- ret = dict_get_int32(dict, key, &brick_online);
+ keylen = snprintf(key, sizeof(key),
+ "%s%" PRId64 ".brick%" PRId64 ".status",
+ key_prefix, index, i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_online);
if (ret || !brick_online) {
ret = 1;
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -2917,10 +2937,10 @@ glusterd_volume_quorum_check(glusterd_volinfo_t *volinfo, int64_t index,
ret = 1;
quorum_met = _gf_false;
for (i = 0; i < volinfo->dist_leaf_count; i++) {
- snprintf(key, sizeof(key),
- "%s%" PRId64 ".brick%" PRId64 ".status", key_prefix,
- index, (j * volinfo->dist_leaf_count) + i);
- ret = dict_get_int32(dict, key, &brick_online);
+ keylen = snprintf(
+ key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status",
+ key_prefix, index, (j * volinfo->dist_leaf_count) + i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_online);
if (ret || !brick_online) {
if (i == 0)
first_brick_on = _gf_false;
@@ -2951,9 +2971,9 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_snap_common_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict,
- int64_t index, char *key_prefix,
+ int64_t index, const char *key_prefix,
int8_t snap_force,
gf_boolean_t snap_volume,
char **op_errstr, uint32_t *op_errno)
@@ -3002,9 +3022,10 @@ glusterd_snap_common_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict,
quorum_count = volinfo->brick_count;
}
- ret = dict_get_str(volinfo->dict, "cluster.quorum-type", &quorum_type);
+ ret = dict_get_str_sizen(volinfo->dict, "cluster.quorum-type",
+ &quorum_type);
if (!ret && !strcmp(quorum_type, "fixed")) {
- ret = dict_get_int32(volinfo->dict, "cluster.quorum-count", &tmp);
+ ret = dict_get_int32_sizen(volinfo->dict, "cluster.quorum-count", &tmp);
/* if quorum-type option is not found in the
dict assume auto quorum type. i.e n/2 + 1.
The same assumption is made when quorum-count
@@ -3046,12 +3067,12 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume,
char **op_errstr, uint32_t *op_errno)
{
const char err_str[] = "glusterds are not in quorum";
- char key_prefix[PATH_MAX] = {
+ char key_prefix[16] = {
0,
};
char *snapname = NULL;
@@ -3060,9 +3081,6 @@ glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume,
glusterd_volinfo_t *tmp_volinfo = NULL;
char *volname = NULL;
int64_t volcount = 0;
- char key[PATH_MAX] = {
- 0,
- };
int64_t i = 0;
int32_t ret = -1;
xlator_t *this = NULL;
@@ -3077,7 +3095,7 @@ glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume,
}
if (snap_volume) {
- ret = dict_get_str(dict, "snapname", &snapname);
+ ret = dict_get_str_sizen(dict, "snapname", &snapname);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"failed to "
@@ -3119,9 +3137,7 @@ glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume,
}
for (i = 1; i <= volcount; i++) {
- snprintf(key, sizeof(key), "%s%" PRId64,
- snap_volume ? "snap-volname" : "volname", i);
- ret = dict_get_str(dict, "clonename", &volname);
+ ret = dict_get_str_sizen(dict, "clonename", &volname);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"failed to "
@@ -3168,14 +3184,14 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_snap_quorum_check_for_create(dict_t *dict, gf_boolean_t snap_volume,
char **op_errstr, uint32_t *op_errno)
{
int8_t snap_force = 0;
int32_t force = 0;
const char err_str[] = "glusterds are not in quorum";
- char key_prefix[PATH_MAX] = {
+ char key_prefix[16] = {
0,
};
char *snapname = NULL;
@@ -3183,7 +3199,7 @@ glusterd_snap_quorum_check_for_create(dict_t *dict, gf_boolean_t snap_volume,
glusterd_volinfo_t *volinfo = NULL;
char *volname = NULL;
int64_t volcount = 0;
- char key[PATH_MAX] = {
+ char key[32] = {
0,
};
int64_t i = 0;
@@ -3310,7 +3326,7 @@ glusterd_snap_quorum_check(dict_t *dict, gf_boolean_t snap_volume,
goto out;
}
- ret = dict_get_int32(dict, "type", &snap_command);
+ ret = dict_get_int32_sizen(dict, "type", &snap_command);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"unable to get the type of "
@@ -3364,6 +3380,25 @@ out:
return ret;
}
+int
+glusterd_is_path_mounted(const char *path)
+{
+ FILE *mtab = NULL;
+ struct mntent *part = NULL;
+ int is_mounted = 0;
+
+ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) {
+ while ((part = getmntent(mtab)) != NULL) {
+ if ((part->mnt_fsname != NULL) &&
+ (strcmp(part->mnt_dir, path)) == 0) {
+ is_mounted = 1;
+ break;
+ }
+ }
+ endmntent(mtab);
+ }
+ return is_mounted;
+}
/* This function will do unmount for snaps.
*/
int32_t
@@ -3388,14 +3423,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo)
continue;
}
- /* Fetch the brick mount path from the brickinfo->path */
- ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path);
+ ret = glusterd_find_brick_mount_path(brickinfo->path,
+ &brick_mount_path);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,
"Failed to find brick_mount_path for %s", brickinfo->path);
- /* There is chance that brick path is already
- * unmounted. */
- ret = 0;
goto out;
}
/* unmount cannot be done when the brick process is still in
@@ -3440,6 +3472,10 @@ glusterd_umount(const char *path)
GF_ASSERT(this);
GF_ASSERT(path);
+ if (!glusterd_is_path_mounted(path)) {
+ return 0;
+ }
+
runinit(&runner);
snprintf(msg, sizeof(msg), "umount path %s", path);
runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL);
@@ -3515,9 +3551,9 @@ glusterd_copy_file(const char *source, const char *destination)
ret = sys_write(dest_fd, buffer, read_len);
if (ret != read_len) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
- "Error writing in "
- "file %s",
- destination);
+ "Writing in "
+ "file %s failed with error %s",
+ destination, strerror(errno));
goto out;
}
} while (ret > 0);
@@ -3572,13 +3608,17 @@ glusterd_copy_folder(const char *source, const char *destination)
continue;
ret = snprintf(src_path, sizeof(src_path), "%s/%s", source,
entry->d_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", destination,
entry->d_name);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -3734,8 +3774,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
GLUSTERD_GET_VOLUME_DIR(dest_dir, dest_vol, priv);
ret = snprintf(src_path, sizeof(src_path), "%s/quota.conf", src_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
/* quota.conf is not present if quota is not enabled, Hence ignoring
* the absence of this file
@@ -3748,8 +3790,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.conf", dest_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -3773,8 +3817,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.cksum", dest_dir);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
+ }
ret = glusterd_copy_file(src_path, dest_path);
if (ret) {
@@ -3788,6 +3834,148 @@ out:
return ret;
}
+/* *
+ * Here there are two possibilities, either destination is snaphot or
+ * clone. In the case of snapshot nfs_ganesha export file will be copied
+ * to snapdir. If it is clone , then new export file will be created for
+ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of
+ * volname with clonename
+ */
+int
+glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol)
+{
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = {
+ 0,
+ };
+ char src_path[PATH_MAX] = {
+ 0,
+ };
+ char dest_path[PATH_MAX] = {
+ 0,
+ };
+ char buffer[BUFSIZ] = {
+ 0,
+ };
+ char *find_ptr = NULL;
+ char *buff_ptr = NULL;
+ char *tmp_ptr = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ FILE *src = NULL;
+ FILE *dest = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO(this->name, dest_vol, out);
+
+ if (glusterd_check_ganesha_export(src_vol) == _gf_false) {
+ gf_msg_debug(this->name, 0,
+ "%s is not exported via "
+ "NFS-Ganesha. Skipping copy of export conf.",
+ src_vol->volname);
+ ret = 0;
+ goto out;
+ }
+
+ if (src_vol->is_snap_volume) {
+ GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv);
+ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir,
+ src_vol->snapshot->snapname);
+ } else {
+ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ }
+ if (ret < 0 || ret >= PATH_MAX)
+ goto out;
+
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ /*
+ * This code path is hit, only when the src_vol is being *
+ * exported via NFS-Ganesha. So if the conf file is not *
+ * available, we fail the snapshot operation. *
+ */
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s", src_path, strerror(errno));
+ goto out;
+ }
+
+ if (dest_vol->is_snap_volume) {
+ memset(snap_dir, 0, PATH_MAX);
+ GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv);
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ snap_dir, dest_vol->snapshot->snapname);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+ goto out;
+ }
+
+ } else {
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, dest_vol->volname);
+ if (ret < 0)
+ goto out;
+
+ src = fopen(src_path, "r");
+ dest = fopen(dest_path, "w");
+
+ if (!src || !dest) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Failed to open %s", dest ? src_path : dest_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* *
+ * if the source volume is snapshot, the export conf file
+ * consists of orginal volname
+ */
+ if (src_vol->is_snap_volume)
+ find_ptr = gf_strdup(src_vol->parent_volname);
+ else
+ find_ptr = gf_strdup(src_vol->volname);
+
+ if (!find_ptr)
+ goto out;
+
+ /* Replacing volname with clonename */
+ while (fgets(buffer, BUFSIZ, src)) {
+ buff_ptr = buffer;
+ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) {
+ while (buff_ptr < tmp_ptr)
+ fputc((int)*buff_ptr++, dest);
+ fputs(dest_vol->volname, dest);
+ buff_ptr += strlen(find_ptr);
+ }
+ fputs(buff_ptr, dest);
+ memset(buffer, 0, BUFSIZ);
+ }
+ }
+out:
+ if (src)
+ fclose(src);
+ if (dest)
+ fclose(dest);
+ if (find_ptr)
+ GF_FREE(find_ptr);
+
+ return ret;
+}
+
int32_t
glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol)
{
@@ -3798,7 +3986,7 @@ glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol)
char *origin_volname = NULL;
glusterd_volinfo_t *origin_vol = NULL;
int i = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
char session[PATH_MAX] = "";
char slave[PATH_MAX] = "";
char snapgeo_dir[PATH_MAX] = "";
@@ -3876,6 +4064,64 @@ out:
return ret;
}
+int
+glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
+ glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = "";
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO(this->name, snap, out);
+
+ GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv);
+
+ ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir,
+ snap->snapname);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ if (errno == ENOENT) {
+ ret = 0;
+ gf_msg_debug(this->name, 0, "%s not found", src_path);
+ } else
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s", src_path, strerror(errno));
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+
+out:
+ return ret;
+}
+
/* Snapd functions */
int
glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo)
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
index 19fedecee8d..5762999bba7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
@@ -10,6 +10,16 @@
#ifndef _GLUSTERD_SNAP_UTILS_H
#define _GLUSTERD_SNAP_UTILS_H
+#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \
+ do { \
+ int32_t _snap_dir_len; \
+ _snap_dir_len = snprintf(path, PATH_MAX, "%s/snaps/%s", priv->workdir, \
+ snap->snapname); \
+ if ((_snap_dir_len < 0) || (_snap_dir_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } \
+ } while (0)
+
int32_t
glusterd_snap_volinfo_find(char *volname, glusterd_snap_t *snap,
glusterd_volinfo_t **volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 543d677f420..aeaa8d15214 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -35,18 +35,18 @@
#include <regex.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
-#include "run.h"
+#include <glusterfs/run.h>
#include "glusterd-volgen.h"
#include "glusterd-mgmt.h"
#include "glusterd-syncop.h"
@@ -55,12 +55,27 @@
#include "glusterfs3.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
-#include "lvm-defaults.h"
-#include "events.h"
+#include <glusterfs/lvm-defaults.h>
+#include <glusterfs/events.h>
+
+#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) \
+ do { \
+ char *snap_volname_ptr = ret_string; \
+ char tmp_uuid[64]; \
+ char *snap_volid_ptr = uuid_utoa_r(uuid, tmp_uuid); \
+ while (*snap_volid_ptr) { \
+ if (*snap_volid_ptr == '-') { \
+ snap_volid_ptr++; \
+ } else { \
+ (*snap_volname_ptr++) = (*snap_volid_ptr++); \
+ } \
+ } \
+ *snap_volname_ptr = '\0'; \
+ } while (0)
char snap_mount_dir[VALID_GLUSTERD_PATHMAX];
struct snap_create_args_ {
@@ -186,7 +201,7 @@ glusterd_find_missed_snap(dict_t *rsp_dict, glusterd_volinfo_t *vol,
continue;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list)
{
if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid)) {
@@ -202,18 +217,18 @@ glusterd_find_missed_snap(dict_t *rsp_dict, glusterd_volinfo_t *vol,
ret = glusterd_add_missed_snaps_to_dict(
rsp_dict, vol, brickinfo, brick_count + 1, op);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg(this->name, GF_LOG_ERROR, 0,
GD_MSG_MISSED_SNAP_CREATE_FAIL,
"Failed to add missed snapshot "
"info for %s:%s in the "
"rsp_dict",
brickinfo->hostname, brickinfo->path);
- rcu_read_unlock();
goto out;
}
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
brick_count++;
}
@@ -499,6 +514,7 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
ret = snprintf(georep_session_dir, sizeof(georep_session_dir), "%s/%s/%s",
priv->workdir, GEOREP, session);
if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -506,10 +522,11 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
priv->workdir, GLUSTERD_VOL_SNAP_DIR_PREFIX,
snap_vol->snapshot->snapname, GEOREP, session);
if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
- ret = mkdir_p(snap_session_dir, 0777, _gf_true);
+ ret = mkdir_p(snap_session_dir, 0755, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
"Creating directory %s failed", snap_session_dir);
@@ -553,12 +570,14 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
ret = snprintf(src_path, sizeof(src_path), "%s/%s", georep_session_dir,
files[i]->d_name);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", snap_session_dir,
files[i]->d_name);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -617,17 +636,19 @@ glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo)
"%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
volinfo->volname);
if ((len < 0) || (len >= sizeof(delete_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH,
priv->workdir);
- if ((len < 0) || (len >= sizeof(delete_path))) {
+ if ((len < 0) || (len >= sizeof(trashdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
/* Create trash folder if it is not there */
- ret = sys_mkdir(trashdir, 0777);
+ ret = sys_mkdir(trashdir, 0755);
if (ret && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
"Failed to create trash directory, reason : %s",
@@ -648,7 +669,7 @@ glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo)
/* Re-create an empty origin volume folder so that restore can
* happen. */
- ret = sys_mkdir(pathname, 0777);
+ ret = sys_mkdir(pathname, 0755);
if (ret && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
"Failed to create origin "
@@ -689,14 +710,14 @@ out:
return op_ret;
}
-int32_t
+static int32_t
glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol,
glusterd_volinfo_t *snap_vol, dict_t *rsp_dict)
{
int32_t ret = -1;
int i = 0;
xlator_t *this = NULL;
- char key[PATH_MAX] = "";
+ char key[32] = "";
char session[PATH_MAX] = "";
char slave[PATH_MAX] = "";
char snapgeo_dir[PATH_MAX] = "";
@@ -715,13 +736,14 @@ glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol,
* is slave volume.
*/
if (!origin_vol->gsync_slaves) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_SLAVE, NULL);
ret = 0;
goto out;
}
GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv);
- ret = sys_mkdir(snapgeo_dir, 0777);
+ ret = sys_mkdir(snapgeo_dir, 0755);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
"Creating directory %s failed", snapgeo_dir);
@@ -1403,6 +1425,8 @@ glusterd_handle_snapshot_config(rpcsvc_request_t *req, glusterd_op_t op,
&config_command);
if (ret) {
snprintf(err_str, len, "Failed to get config-command type");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=config-command", NULL);
goto out;
}
@@ -1905,133 +1929,6 @@ out:
}
int
-glusterd_snapshot_pause_tier(xlator_t *this, glusterd_volinfo_t *volinfo)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *op_errstr = NULL;
-
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
- GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
-
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- ret = 0;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- goto out;
- }
-
- ret = dict_set_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
- GF_DEFRAG_CMD_PAUSE_TIER);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to set rebalance-command");
- goto out;
- }
-
- ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to set volname");
- goto out;
- }
-
- ret = gd_brick_op_phase(GD_OP_DEFRAG_BRICK_VOLUME, NULL, dict, &op_errstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_PAUSE_TIER_FAIL,
- "Failed to pause tier. Errstr=%s", op_errstr);
- goto out;
- }
-
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
-glusterd_snapshot_resume_tier(xlator_t *this, dict_t *snap_dict)
-{
- int ret = -1;
- dict_t *dict = NULL;
- int64_t volcount = 0;
- char key[64] = "";
- int keylen;
- char *volname = NULL;
- int i = 0;
- char *op_errstr = NULL;
- glusterd_volinfo_t *volinfo = NULL;
-
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
- GF_VALIDATE_OR_GOTO(this->name, snap_dict, out);
-
- ret = dict_get_int64(snap_dict, "volcount", &volcount);
- if (ret) {
- goto out;
- }
- if (volcount <= 0) {
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict)
- goto out;
-
- for (i = 1; i <= volcount; i++) {
- keylen = snprintf(key, sizeof(key), "volname%d", i);
- ret = dict_get_strn(snap_dict, key, keylen, &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to get key %s", volname);
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret)
- goto out;
-
- if (volinfo->type != GF_CLUSTER_TYPE_TIER)
- continue;
-
- ret = dict_set_int32n(dict, "rebalance-command",
- SLEN("rebalance-command"),
- GF_DEFRAG_CMD_RESUME_TIER);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to set rebalance-command");
-
- goto out;
- }
-
- ret = dict_set_strn(dict, "volname", SLEN("volname"), volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "Failed to set volname");
- goto out;
- }
-
- ret = gd_brick_op_phase(GD_OP_DEFRAG_BRICK_VOLUME, NULL, dict,
- &op_errstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESUME_TIER_FAIL,
- "Failed to resume tier");
- goto out;
- }
- }
-
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
glusterd_snap_create_clone_common_prevalidate(
dict_t *rsp_dict, int flags, char *snapname, char *err_str,
char *snap_volname, int64_t volcount, glusterd_volinfo_t *volinfo,
@@ -2039,7 +1936,7 @@ glusterd_snap_create_clone_common_prevalidate(
{
char *device = NULL;
char *orig_device = NULL;
- char key[PATH_MAX] = "";
+ char key[128] = "";
int ret = -1;
int64_t i = 1;
int64_t brick_order = 0;
@@ -2088,6 +1985,13 @@ glusterd_snap_create_clone_common_prevalidate(
"command or use [force] option in "
"snapshot create to override this "
"behavior.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run volume status command to see brick "
+ "status.Please start the stopped brick and then issue "
+ "snapshot create command or use 'force' option in "
+ "snapshot create to override this behavior.",
+ NULL);
} else {
snprintf(err_str, PATH_MAX,
"One or more bricks are not running. "
@@ -2096,6 +2000,12 @@ glusterd_snap_create_clone_common_prevalidate(
"Please start the stopped brick "
"and then issue snapshot clone "
"command ");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run snapshot status command to see brick "
+ "status. Please start the stopped brick and then issue "
+ "snapshot clone command.",
+ NULL);
}
*op_errno = EG_BRCKDWN;
ret = -1;
@@ -2111,6 +2021,10 @@ glusterd_snap_create_clone_common_prevalidate(
if (len < 0) {
strcpy(err_str, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Brick_hostname=%s, Brick_path=%s", brickinfo->hostname,
+ brickinfo->path, NULL);
ret = -1;
goto out;
}
@@ -2122,6 +2036,11 @@ glusterd_snap_create_clone_common_prevalidate(
"all bricks of %s are thinly "
"provisioned LV.",
volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED,
+ "Ensure that all bricks of volume are thinly "
+ "provisioned LV, Volume=%s",
+ volinfo->volname, NULL);
ret = -1;
goto out;
}
@@ -2134,6 +2053,9 @@ glusterd_snap_create_clone_common_prevalidate(
"cannot copy the snapshot device "
"name (volname: %s, snapname: %s)",
volinfo->volname, snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, "Volname=%s, Snapname=%s",
+ volinfo->volname, snapname, NULL);
*loglevel = GF_LOG_WARNING;
ret = -1;
goto out;
@@ -2238,7 +2160,6 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
char *clonename = NULL;
char *snapname = NULL;
char device_name[64] = "";
- char key[PATH_MAX] = "";
glusterd_snap_t *snap = NULL;
char err_str[PATH_MAX] = "";
int ret = -1;
@@ -2247,6 +2168,7 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
xlator_t *this = NULL;
uuid_t *snap_volid = NULL;
gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
GF_ASSERT(op_errstr);
@@ -2267,7 +2189,8 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
goto out;
}
- if (glusterd_check_volume_exists(clonename)) {
+ ret = glusterd_volinfo_find(clonename, &volinfo);
+ if (!ret) {
ret = -1;
snprintf(err_str, sizeof(err_str),
"Volume with name:%s "
@@ -2299,8 +2222,17 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
goto out;
}
- snprintf(key, sizeof(key) - 1, "vol1_volid");
- ret = dict_get_bin(dict, key, (void **)&snap_volid);
+ if (!glusterd_is_volume_started(snap_vol)) {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is "
+ "not activated",
+ snap->snapname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_VOLSTP;
+ goto out;
+ }
+
+ ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Unable to fetch snap_volid");
@@ -2523,13 +2455,6 @@ glusterd_snapshot_create_prevalidate(dict_t *dict, char **op_errstr,
"Failed to pre validate");
goto out;
}
-
- ret = glusterd_snapshot_pause_tier(this, volinfo);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_PAUSE_TIER_FAIL,
- "Failed to pause tier in snap prevalidate.");
- goto out;
- }
}
ret = dict_set_int64(rsp_dict, "volcount", volcount);
@@ -3156,11 +3081,11 @@ out:
static int
glusterd_snapshot_get_snapvol_detail(dict_t *dict, glusterd_volinfo_t *snap_vol,
- char *keyprefix, int detail)
+ const char *keyprefix, const int detail)
{
int ret = -1;
int snap_limit = 0;
- char key[PATH_MAX] = "";
+ char key[64] = ""; /* keyprefix is quite small, up to 32 byts */
int keylen;
char *value = NULL;
glusterd_volinfo_t *origin_vol = NULL;
@@ -3323,13 +3248,14 @@ out:
static int
glusterd_snapshot_get_snap_detail(dict_t *dict, glusterd_snap_t *snap,
- char *keyprefix, glusterd_volinfo_t *volinfo)
+ const char *keyprefix,
+ glusterd_volinfo_t *volinfo)
{
int ret = -1;
int volcount = 0;
- char key[PATH_MAX] = "";
+ char key[32] = ""; /* keyprefix is quite small, up to 16 bytes */
int keylen;
- char timestr[64] = "";
+ char timestr[GF_TIMESTR_SIZE] = "";
char *value = NULL;
glusterd_volinfo_t *snap_vol = NULL;
glusterd_volinfo_t *tmp_vol = NULL;
@@ -3491,7 +3417,7 @@ glusterd_snapshot_get_all_snap_info(dict_t *dict)
{
int ret = -1;
int snapcount = 0;
- char key[64] = "";
+ char key[16] = "";
glusterd_snap_t *snap = NULL;
glusterd_snap_t *tmp_snap = NULL;
glusterd_conf_t *priv = NULL;
@@ -3538,7 +3464,7 @@ glusterd_snapshot_get_info_by_volume(dict_t *dict, char *volname, char *err_str,
int snapcount = 0;
int snap_limit = 0;
char *value = NULL;
- char key[64] = "";
+ char key[16] = "";
glusterd_volinfo_t *volinfo = NULL;
glusterd_volinfo_t *snap_vol = NULL;
glusterd_volinfo_t *tmp_vol = NULL;
@@ -3836,7 +3762,7 @@ glusterd_snapshot_get_vol_snapnames(dict_t *dict, glusterd_volinfo_t *volinfo)
int ret = -1;
int snapcount = 0;
char *snapname = NULL;
- char key[PATH_MAX] = "";
+ char key[32] = "";
glusterd_volinfo_t *snap_vol = NULL;
glusterd_volinfo_t *tmp_vol = NULL;
xlator_t *this = NULL;
@@ -4004,7 +3930,8 @@ glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
- ret = dict_set_int64(dict, "snap-time", (int64_t)time(&snap_time));
+ snap_time = gf_time();
+ ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Unable to set snap-time");
@@ -4569,6 +4496,7 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
snap_uuid, snap_vol->volname, brick_number, brickinfo->path,
op, GD_MISSED_SNAP_PENDING);
if ((len < 0) || (len >= sizeof(missed_snap_entry))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -4576,6 +4504,8 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
ret = dict_get_int32n(rsp_dict, "missed_snap_count",
SLEN("missed_snap_count"), &missed_snap_count);
if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=missed_snap_count", NULL);
/* Initialize the missed_snap_count for the first time */
missed_snap_count = 0;
}
@@ -4731,7 +4661,7 @@ glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo,
goto out;
}
- ret = mkdir_p(snap_brick_mount_path, 0777, _gf_true);
+ ret = mkdir_p(snap_brick_mount_path, 0755, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
"creating the brick directory"
@@ -4765,7 +4695,7 @@ glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo,
ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY,
snap_volinfo->volume_id, 16, XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
"Failed to set "
"extended attribute %s on %s. Reason: "
"%s, snap: %s",
@@ -5393,6 +5323,48 @@ glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier"));
gd_update_volume_op_versions(snap_vol);
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_vol->dict,
+ "features.cache-invalidation", "on");
+ ret = gd_ganesha_send_dbus(clonename, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ goto out;
+ }
+ }
+ if (!glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ /* This happens when a snapshot was created when Ganesha was
+ * enabled globally. Then Ganesha disabled from the cluster.
+ * In such cases, we will have the volume level option set
+ * on dict, So we have to disable it as it doesn't make sense
+ * to keep the option.
+ */
+
+ ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off");
+ if (ret)
+ goto out;
+ }
+
ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
@@ -5464,8 +5436,31 @@ out:
for (i = 0; unsupported_opt[i].key; i++)
GF_FREE(unsupported_opt[i].value);
- if (snap_vol)
+ if (snap_vol) {
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ }
+ }
+
+ ret = gd_ganesha_send_dbus(clonename, "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ }
+ }
+
glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true);
+ }
snap_vol = NULL;
}
@@ -5517,6 +5512,8 @@ glusterd_snapshot_activate_deactivate_prevalidate(dict_t *dict,
"Snapshot (%s) does not "
"exist.",
snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snapname=%s", snapname, NULL);
*op_errno = EG_NOSNAP;
ret = -1;
goto out;
@@ -5629,12 +5626,12 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_handle_snapshot_delete_all(dict_t *dict)
{
int32_t ret = -1;
int32_t i = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
glusterd_conf_t *priv = NULL;
glusterd_snap_t *snap = NULL;
glusterd_snap_t *tmp_snap = NULL;
@@ -7232,10 +7229,10 @@ out:
return ret;
}
-int
+static int
glusterd_get_brick_lvm_details(dict_t *rsp_dict,
glusterd_brickinfo_t *brickinfo, char *volname,
- char *device, char *key_prefix)
+ char *device, const char *key_prefix)
{
int ret = -1;
glusterd_conf_t *priv = NULL;
@@ -7247,7 +7244,7 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict,
char buf[PATH_MAX] = "";
char *ptr = NULL;
char *token = NULL;
- char key[PATH_MAX] = "";
+ char key[160] = ""; /* key_prefix is 128 bytes at most */
char *value = NULL;
GF_ASSERT(rsp_dict);
@@ -7322,11 +7319,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup(token);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
ret = -1;
goto end;
}
ret = snprintf(key, sizeof(key), "%s.data", key_prefix);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto end;
}
@@ -7341,11 +7342,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup(token);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
ret = -1;
goto end;
}
ret = snprintf(key, sizeof(key), "%s.lvsize", key_prefix);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto end;
}
@@ -7375,16 +7380,16 @@ out:
return ret;
}
-int
+static int
glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
- char *keyprefix, int index,
+ const char *keyprefix, int index,
glusterd_volinfo_t *snap_volinfo,
glusterd_brickinfo_t *brickinfo)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
- char key[PATH_MAX] = "";
+ char key[128] = ""; /* keyprefix is not longer than 64 bytes */
int keylen;
char *device = NULL;
char *value = NULL;
@@ -7405,6 +7410,7 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, index);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7412,11 +7418,14 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
ret = snprintf(brick_path, sizeof(brick_path), "%s:%s", brickinfo->hostname,
brickinfo->path);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
value = gf_strdup(brick_path);
if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_path=%s", brick_path, NULL);
ret = -1;
goto out;
}
@@ -7492,6 +7501,8 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
index);
if (keylen < 0) {
ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
goto out;
}
@@ -7549,13 +7560,13 @@ out:
return ret;
}
-int
+static int
glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
- char *keyprefix, glusterd_snap_t *snap)
+ const char *keyprefix, glusterd_snap_t *snap)
{
int ret = -1;
xlator_t *this = NULL;
- char key[PATH_MAX] = "";
+ char key[64] = ""; /* keyprefix is "status.snap0" */
int keylen;
char brickkey[PATH_MAX] = "";
glusterd_volinfo_t *snap_volinfo = NULL;
@@ -7577,6 +7588,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
{
keylen = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7600,6 +7612,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
}
keylen = snprintf(brickkey, sizeof(brickkey), "%s.brickcount", key);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -7614,6 +7627,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7630,12 +7644,13 @@ out:
return ret;
}
-int
+static int
glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
- glusterd_snap_t *snap, char *keyprefix)
+ glusterd_snap_t *snap,
+ const char *keyprefix)
{
int ret = -1;
- char key[PATH_MAX] = "";
+ char key[32] = ""; /* keyprefix is "status.snap0" */
int keylen;
char *temp = NULL;
xlator_t *this = NULL;
@@ -7652,6 +7667,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
*/
keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7673,6 +7689,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
keylen = snprintf(key, sizeof(key), "%s.uuid", keyprefix);
if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -7756,6 +7773,7 @@ glusterd_get_snap_status_of_volume(char **op_errstr, dict_t *rsp_dict,
{
ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -7807,6 +7825,7 @@ glusterd_get_all_snapshot_status(dict_t *dict, char **op_errstr,
{
ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
}
@@ -7841,7 +7860,6 @@ glusterd_snapshot_status_commit(dict_t *dict, char **op_errstr,
xlator_t *this = NULL;
int ret = -1;
glusterd_conf_t *conf = NULL;
- char *get_buffer = NULL;
int32_t cmd = -1;
char *snapname = NULL;
glusterd_snap_t *snap = NULL;
@@ -7910,8 +7928,7 @@ glusterd_snapshot_status_commit(dict_t *dict, char **op_errstr,
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
"Unable to "
- "get status of snap %s",
- get_buffer);
+ "get status of snap");
goto out;
}
@@ -8321,12 +8338,6 @@ glusterd_snapshot_create_postvalidate(dict_t *dict, int32_t op_ret,
}
}
- ret = glusterd_snapshot_resume_tier(this, dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESUME_TIER_FAIL,
- "Failed to resume tier in snapshot postvalidate.");
- }
-
out:
return ret;
}
@@ -8858,6 +8869,7 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo)
"%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
volinfo->volname);
if ((len < 0) || (len >= sizeof(trash_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -8918,12 +8930,10 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo)
snap_vol->volume_id,
sizeof(snap_vol->volume_id), XATTR_REPLACE);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL,
- "Failed to set extended "
- "attribute %s on %s. "
- "Reason: %s, snap: %s",
- GF_XATTR_VOL_ID_KEY, brickinfo->path,
- strerror(errno), snap_vol->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, brickinfo->path,
+ strerror(errno), snap_vol->volname, NULL);
goto out;
}
}
@@ -9303,6 +9313,7 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -9985,7 +9996,7 @@ glusterd_snapshot_get_volnames_uuids(dict_t *dict, char *volname,
{
int ret = -1;
int snapcount = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
glusterd_volinfo_t *snap_vol = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_volinfo_t *tmp_vol = NULL;
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c
index 8c2786cb3f7..225d10cc546 100644
--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c
@@ -8,11 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "glusterd.h"
#include "glusterd-shd-svc.h"
#include "glusterd-quotad-svc.h"
-#include "glusterd-nfs-svc.h"
#include "glusterd-locks.h"
#include "glusterd-messages.h"
@@ -201,13 +200,10 @@ glusterd_dump_priv(xlator_t *this)
gf_proc_dump_build_key(key, "glusterd", "ping-timeout");
gf_proc_dump_write(key, "%d", priv->ping_timeout);
-
- gf_proc_dump_build_key(key, "glusterd", "shd.online");
- gf_proc_dump_write(key, "%d", priv->shd_svc.online);
-
+#ifdef BUILD_GNFS
gf_proc_dump_build_key(key, "glusterd", "nfs.online");
gf_proc_dump_write(key, "%d", priv->nfs_svc.online);
-
+#endif
gf_proc_dump_build_key(key, "glusterd", "quotad.online");
gf_proc_dump_write(key, "%d", priv->quotad_svc.online);
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.h b/xlators/mgmt/glusterd/src/glusterd-statedump.h
index 7d7fc1a7fa3..b5ef1f48e82 100644
--- a/xlators/mgmt/glusterd/src/glusterd-statedump.h
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.h
@@ -11,7 +11,7 @@
#ifndef _GLUSTERD_STATEDUMP_H_
#define _GLUSTERD_STATEDUMP_H_
-#include "xlator.h"
+#include <glusterfs/xlator.h>
int
glusterd_dump_priv(xlator_t *this);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 57ad7ca501d..d94dceb10b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -11,32 +11,32 @@
#include "glusterd-op-sm.h"
#include <inttypes.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
-#include "syscall.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-hooks.h"
-#include "store.h"
+#include <glusterfs/store.h>
#include "glusterd-store.h"
#include "glusterd-snapshot-utils.h"
#include "glusterd-messages.h"
#include "rpc-clnt.h"
-#include "common-utils.h"
-#include "quota-common-utils.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/quota-common-utils.h>
#include <sys/resource.h>
#include <inttypes.h>
@@ -48,6 +48,23 @@
#include "mntent_compat.h"
#endif
+#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \
+ do { \
+ int32_t _brick_len; \
+ if (volinfo->is_snap_volume) { \
+ _brick_len = snprintf(path, PATH_MAX, "%s/snaps/%s/%s/%s", \
+ priv->workdir, volinfo->snapshot->snapname, \
+ volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \
+ } else { \
+ _brick_len = snprintf(path, PATH_MAX, "%s/%s/%s/%s", \
+ priv->workdir, GLUSTERD_VOLUME_DIR_PREFIX, \
+ volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \
+ } \
+ if ((_brick_len < 0) || (_brick_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } \
+ } while (0)
+
void
glusterd_replace_slash_with_hyphen(char *str)
{
@@ -57,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str)
while (ptr) {
*ptr = '-';
- ptr = strchr(str, '/');
+ ptr = strchr(ptr, '/');
}
}
@@ -239,9 +256,10 @@ out:
int32_t
glusterd_store_volinfo_brick_fname_write(int vol_fd,
glusterd_brickinfo_t *brickinfo,
- int32_t brick_count)
+ int32_t brick_count,
+ int is_thin_arbiter)
{
- char key[PATH_MAX] = {
+ char key[64] = {
0,
};
char brickfname[PATH_MAX] = {
@@ -249,8 +267,13 @@ glusterd_store_volinfo_brick_fname_write(int vol_fd,
};
int32_t ret = -1;
- snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
- brick_count);
+ if (!is_thin_arbiter) {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ } else {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_TA_BRICK,
+ brick_count);
+ }
glusterd_store_brickinfofname_set(brickinfo, brickfname,
sizeof(brickfname));
ret = gf_store_save_value(vol_fd, key, brickfname);
@@ -295,15 +318,14 @@ glusterd_store_create_snapd_shandle_on_absence(glusterd_volinfo_t *volinfo)
* The snapshot details will be stored only if the cluster op-version is
* greater than or equal to 4
*/
-int
+static int
gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
- char value[PATH_MAX] = {
- 0,
- };
+ char value[5 * PATH_MAX];
+ uint total_len = 0;
this = THIS;
GF_ASSERT(this != NULL);
@@ -318,102 +340,104 @@ gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo)
goto out;
}
- if (strlen(brickinfo->device_path) > 0) {
- snprintf(value, sizeof(value), "%s", brickinfo->device_path);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
- value);
- if (ret)
- goto out;
+ if (brickinfo->device_path[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ brickinfo->device_path);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
}
- if (strlen(brickinfo->mount_dir) > 0) {
- snprintf(value, sizeof(value), "%s", brickinfo->mount_dir);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR,
- value);
- if (ret)
- goto out;
+ if (brickinfo->mount_dir[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR,
+ brickinfo->mount_dir);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
}
- if (strlen(brickinfo->fstype) > 0) {
- snprintf(value, sizeof(value), "%s", brickinfo->fstype);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_FSTYPE, value);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL,
- "Failed to save "
- "brick fs type of brick %s",
- brickinfo->path);
- goto out;
+ if (brickinfo->fstype[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_FSTYPE, brickinfo->fstype);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
}
+ total_len += ret;
}
- if (strlen(brickinfo->mnt_opts) > 0) {
- snprintf(value, sizeof(value), "%s", brickinfo->mnt_opts);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_MNTOPTS, value);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL,
- "Failed to save "
- "brick mnt opts of brick %s",
- brickinfo->path);
- goto out;
+ if (brickinfo->mnt_opts[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_MNTOPTS, brickinfo->mnt_opts);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
}
+ total_len += ret;
}
- snprintf(value, sizeof(value), "%d", brickinfo->snap_status);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, value);
- if (ret)
- goto out;
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ brickinfo->snap_status);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
- snprintf(value, sizeof(value), "%" PRIu64, brickinfo->statfs_fsid);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_FSID, value);
+ ret = snprintf(value + total_len, sizeof(value) - total_len,
+ "%s=%" PRIu64 "\n", GLUSTERD_STORE_KEY_BRICK_FSID,
+ brickinfo->statfs_fsid);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ ret = gf_store_save_items(fd, value);
+err:
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL,
+ "Failed to save "
+ "snap detils of brick %s",
+ brickinfo->path);
+ }
out:
return ret;
}
-int32_t
+static int32_t
glusterd_store_brickinfo_write(int fd, glusterd_brickinfo_t *brickinfo)
{
- char value[256] = {
- 0,
- };
- int32_t ret = 0;
+ char value[5 * PATH_MAX];
+ int32_t ret = -1;
GF_ASSERT(brickinfo);
GF_ASSERT(fd > 0);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_UUID,
- uuid_utoa(brickinfo->uuid));
- if (ret)
- goto out;
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
- brickinfo->hostname);
- if (ret)
- goto out;
-
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_PATH,
- brickinfo->path);
- if (ret)
- goto out;
-
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
- brickinfo->path);
- if (ret)
- goto out;
-
- snprintf(value, sizeof(value), "%d", brickinfo->port);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_PORT, value);
-
- snprintf(value, sizeof(value), "%d", brickinfo->rdma_port);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, value);
-
- snprintf(value, sizeof(value), "%d", brickinfo->decommissioned);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
- value);
- if (ret)
+ ret = snprintf(value, sizeof(value),
+ "%s=%s\n%s=%s\n%s=%s\n%s=%s\n%s=%d\n%s=%d\n%s=%d\n%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_UUID, uuid_utoa(brickinfo->uuid),
+ GLUSTERD_STORE_KEY_BRICK_HOSTNAME, brickinfo->hostname,
+ GLUSTERD_STORE_KEY_BRICK_PATH, brickinfo->path,
+ GLUSTERD_STORE_KEY_BRICK_REAL_PATH, brickinfo->path,
+ GLUSTERD_STORE_KEY_BRICK_PORT, brickinfo->port,
+ GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, brickinfo->rdma_port,
+ GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ brickinfo->decommissioned, GLUSTERD_STORE_KEY_BRICK_ID,
+ brickinfo->brick_id);
+
+ if (ret < 0 || ret >= sizeof(value)) {
+ ret = -1;
goto out;
+ }
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_ID,
- brickinfo->brick_id);
+ ret = gf_store_save_items(fd, value);
if (ret)
goto out;
@@ -434,7 +458,7 @@ out:
int32_t
glusterd_store_snapd_write(int fd, glusterd_volinfo_t *volinfo)
{
- char value[256] = {
+ char value[64] = {
0,
};
int32_t ret = 0;
@@ -458,7 +482,7 @@ glusterd_store_snapd_write(int fd, glusterd_volinfo_t *volinfo)
return ret;
}
-int32_t
+static int32_t
glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo)
{
int fd = -1;
@@ -470,14 +494,14 @@ glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo)
ret = -1;
goto out;
}
-
ret = glusterd_store_brickinfo_write(fd, brickinfo);
if (ret)
goto out;
out:
- if (ret && (fd > 0))
+ if (ret && (fd > 0)) {
gf_store_unlink_tmppath(brickinfo->shandle);
+ }
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
@@ -522,22 +546,18 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_store_brickinfo(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo, int32_t brick_count,
- int vol_fd)
+ int vol_fd, int is_thin_arbiter)
{
int32_t ret = -1;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
- ret = glusterd_store_volinfo_brick_fname_write(vol_fd, brickinfo,
- brick_count);
- if (ret)
- goto out;
-
- ret = glusterd_store_create_brick_dir(volinfo);
+ ret = glusterd_store_volinfo_brick_fname_write(
+ vol_fd, brickinfo, brick_count, is_thin_arbiter);
if (ret)
goto out;
@@ -639,154 +659,73 @@ out:
return ret;
}
-int32_t
-glusterd_store_remove_bricks(glusterd_volinfo_t *volinfo, char *delete_path)
-{
- int32_t ret = 0;
- glusterd_brickinfo_t *tmp = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- struct dirent scratch[2] = {
- {
- 0,
- },
- };
- char path[PATH_MAX] = {
- 0,
- };
- char brickdir[PATH_MAX] = {
- 0,
- };
- int32_t len = 0;
-
- this = THIS;
- GF_ASSERT(this);
-
- GF_ASSERT(volinfo);
-
- cds_list_for_each_entry(tmp, &volinfo->bricks, brick_list)
- {
- ret = glusterd_store_delete_brick(tmp, delete_path);
- if (ret)
- goto out;
- }
-
- priv = this->private;
- GF_ASSERT(priv);
-
- len = snprintf(brickdir, sizeof(brickdir), "%s/%s", delete_path,
- GLUSTERD_BRICK_INFO_DIR);
- if ((len < 0) || (len >= sizeof(brickdir))) {
- ret = -1;
- goto out;
- }
-
- dir = sys_opendir(brickdir);
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
-
- while (entry) {
- len = snprintf(path, sizeof(path), "%s/%s", brickdir, entry->d_name);
- if ((len >= 0) && (len < sizeof(path))) {
- ret = sys_unlink(path);
- if (ret && errno != ENOENT) {
- gf_msg_debug(this->name, 0, "Unable to unlink %s", path);
- }
- }
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- }
-
- sys_closedir(dir);
-
- ret = sys_rmdir(brickdir);
-
-out:
- gf_msg_debug(this->name, 0, "Returning with %d", ret);
- return ret;
-}
-
static int
-_storeslaves(dict_t *this, char *key, data_t *value, void *data)
-{
- int32_t ret = 0;
- gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
-
- xl = THIS;
- GF_ASSERT(xl);
-
- shandle = (gf_store_handle_t *)data;
-
- GF_ASSERT(shandle);
- GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
- GF_ASSERT(key);
- GF_ASSERT(value);
- GF_ASSERT(value->data);
-
- gf_msg_debug(xl->name, 0, "Storing in volinfo:key= %s, val=%s", key,
- value->data);
-
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
- return -1;
- }
- return 0;
-}
-
-int
-_storeopts(dict_t *this, char *key, data_t *value, void *data)
+_storeopts(dict_t *dict_value, char *key, data_t *value, void *data)
{
int32_t ret = 0;
int32_t exists = 0;
+ int32_t option_len = 0;
gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ xlator_t *this = NULL;
- xl = THIS;
- GF_ASSERT(xl);
+ this = THIS;
+ GF_ASSERT(this);
- shandle = (gf_store_handle_t *)data;
+ dict_data = (glusterd_volinfo_data_store_t *)data;
+ shandle = dict_data->shandle;
GF_ASSERT(shandle);
GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
GF_ASSERT(key);
GF_ASSERT(value);
GF_ASSERT(value->data);
- if (is_key_glusterd_hooks_friendly(key)) {
- exists = 1;
+ if (dict_data->key_check == 1) {
+ if (is_key_glusterd_hooks_friendly(key)) {
+ exists = 1;
- } else {
- exists = glusterd_check_option_exists(key, NULL);
+ } else {
+ exists = glusterd_check_option_exists(key, NULL);
+ }
}
-
- if (1 == exists) {
- gf_msg_debug(xl->name, 0,
- "Storing in volinfo:key= %s, "
+ if (exists == 1 || dict_data->key_check == 0) {
+ gf_msg_debug(this->name, 0,
+ "Storing in buffer for volinfo:key= %s, "
"val=%s",
key, value->data);
-
} else {
- gf_msg_debug(xl->name, 0, "Discarding:key= %s, val=%s", key,
+ gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key,
value->data);
return 0;
}
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
+ /*
+ * The option_len considers the length of the key value
+ * pair and along with that '=' and '\n', but as value->len
+ * already considers a NULL at the end of the data, adding
+ * just 1.
+ */
+ option_len = strlen(key) + value->len + 1;
+
+ if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) {
+ ret = gf_store_save_items(shandle->fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ return -1;
+ }
+ dict_data->buffer_len = 0;
+ dict_data->buffer[0] = '\0';
+ }
+ ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1,
+ "%s=%s\n", key, value->data);
+ if (ret < 0 || ret > option_len + 1) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL);
return -1;
}
+
+ dict_data->buffer_len += ret;
+
return 0;
}
@@ -795,7 +734,7 @@ _storeopts(dict_t *this, char *key, data_t *value, void *data)
* The snapshot details will be stored only if the cluster op-version is
* greater than or equal to 4
*/
-int
+static int
glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo)
{
int ret = -1;
@@ -818,229 +757,163 @@ glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo)
goto out;
}
- snprintf(buf, sizeof(buf), "%s", volinfo->parent_volname);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
- "Failed to store " GLUSTERD_STORE_KEY_PARENT_VOLNAME);
- goto out;
- }
-
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
- uuid_utoa(volinfo->restored_from_snap));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write restored_from_snap");
- goto out;
+ ret = snprintf(buf, sizeof(buf), "%s=%s\n%s=%s\n%s=%" PRIu64 "\n",
+ GLUSTERD_STORE_KEY_PARENT_VOLNAME, volinfo->parent_volname,
+ GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
+ uuid_utoa(volinfo->restored_from_snap),
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ volinfo->snap_max_hard_limit);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
+ goto err;
}
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->snap_max_hard_limit);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf);
+ ret = gf_store_save_items(fd, buf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL,
- "Unable to write snap-max-hard-limit");
- goto out;
+ goto err;
}
-
ret = glusterd_store_snapd_info(volinfo);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_INFO_STORE_FAIL,
- "snapd info store failed "
- "volume: %s",
- volinfo->volname);
-
-out:
+err:
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_WRITE_FAIL,
"Failed to write snap details"
" for volume %s",
volinfo->volname);
- return ret;
-}
-
-int32_t
-glusterd_volume_write_tier_details(int fd, glusterd_volinfo_t *volinfo)
-{
- int32_t ret = -1;
- char buf[PATH_MAX] = "";
-
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- ret = 0;
- goto out;
- }
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.cold_brick_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.cold_replica_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.cold_disperse_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.cold_redundancy_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT,
- buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.hot_brick_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_HOT_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.hot_replica_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.hot_type);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier_info.cold_type);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf);
- if (ret)
- goto out;
-
out:
return ret;
}
-int32_t
+static int32_t
glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
{
char *str = NULL;
- char buf[PATH_MAX] = "";
+ char buf[PATH_MAX];
+ uint total_len = 0;
int32_t ret = -1;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
glusterd_conf_t *conf = NULL;
- this = THIS;
GF_ASSERT(this);
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
- snprintf(buf, sizeof(buf), "%d", volinfo->type);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->brick_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->status);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->sub_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->stripe_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->replica_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_TYPE, volinfo->type,
+ GLUSTERD_STORE_KEY_VOL_COUNT, volinfo->brick_count,
+ GLUSTERD_STORE_KEY_VOL_STATUS, volinfo->status,
+ GLUSTERD_STORE_KEY_VOL_SUB_COUNT, volinfo->sub_count,
+ GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, volinfo->stripe_count,
+ GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, volinfo->replica_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
if ((conf->op_version >= GD_OP_VERSION_3_7_6) && volinfo->arbiter_count) {
- snprintf(buf, sizeof(buf), "%d", volinfo->arbiter_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT, buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_ARBITER_CNT,
+ volinfo->arbiter_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
if (conf->op_version >= GD_OP_VERSION_3_6_0) {
- snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->redundancy_count);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
- buf);
- if (ret)
+ ret = snprintf(
+ buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, volinfo->disperse_count,
+ GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, volinfo->redundancy_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
- snprintf(buf, sizeof(buf), "%d", volinfo->version);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->transport_type);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf);
- if (ret)
- goto out;
-
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_ID,
- uuid_utoa(volinfo->volume_id));
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%s\n", GLUSTERD_STORE_KEY_VOL_VERSION,
+ volinfo->version, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
+ volinfo->transport_type, GLUSTERD_STORE_KEY_VOL_ID,
+ uuid_utoa(volinfo->volume_id));
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
str = glusterd_auth_get_username(volinfo);
if (str) {
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_USERNAME, str);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_USERNAME, str);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
str = glusterd_auth_get_password(volinfo);
if (str) {
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_PASSWORD, str);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_PASSWORD, str);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
- snprintf(buf, sizeof(buf), "%d", volinfo->op_version);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
- buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_OP_VERSION, volinfo->op_version,
+ GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ volinfo->client_op_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
- if (volinfo->caps) {
- snprintf(buf, sizeof(buf), "%d", volinfo->caps);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CAPS, buf);
- if (ret)
- goto out;
}
+ total_len += ret;
if (conf->op_version >= GD_OP_VERSION_3_7_6) {
- snprintf(buf, sizeof(buf), "%d", volinfo->quota_xattr_version);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
- buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
+ volinfo->quota_xattr_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
if (conf->op_version >= GD_OP_VERSION_3_10_0) {
- snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled);
- ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=0\n",
+ GF_TIER_ENABLED);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ if ((conf->op_version >= GD_OP_VERSION_7_0) &&
+ volinfo->thin_arbiter_count) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT,
+ volinfo->thin_arbiter_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
- ret = glusterd_volume_write_tier_details(fd, volinfo);
+ ret = gf_store_save_items(fd, buf);
+ if (ret)
+ goto out;
ret = glusterd_volume_write_snap_details(fd, volinfo);
@@ -1078,36 +951,26 @@ glusterd_store_piddirpath_set(glusterd_volinfo_t *volinfo, char *piddirpath)
}
static int32_t
-glusterd_store_create_volume_dir(glusterd_volinfo_t *volinfo)
+glusterd_store_create_volume_dirs(glusterd_volinfo_t *volinfo)
{
int32_t ret = -1;
- char voldirpath[PATH_MAX] = {
+ char dirpath[PATH_MAX] = {
0,
};
GF_ASSERT(volinfo);
- glusterd_store_voldirpath_set(volinfo, voldirpath);
- ret = gf_store_mkdir(voldirpath);
-
- gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
- return ret;
-}
-
-static int32_t
-glusterd_store_create_volume_run_dir(glusterd_volinfo_t *volinfo)
-{
- int32_t ret = -1;
- char piddirpath[PATH_MAX] = {
- 0,
- };
-
- GF_ASSERT(volinfo);
-
- glusterd_store_piddirpath_set(volinfo, piddirpath);
+ glusterd_store_voldirpath_set(volinfo, dirpath);
+ ret = gf_store_mkdir(dirpath);
+ if (ret)
+ goto out;
- ret = gf_store_mkdir(piddirpath);
+ glusterd_store_piddirpath_set(volinfo, dirpath);
+ ret = gf_store_mkdir(dirpath);
+ if (ret)
+ goto out;
+out:
gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
return ret;
}
@@ -1137,7 +1000,7 @@ glusterd_store_create_snap_dir(glusterd_snap_t *snap)
return ret;
}
-int32_t
+static int32_t
glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
{
int32_t ret = -1;
@@ -1145,28 +1008,57 @@ glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
GF_ASSERT(volinfo->shandle);
+ xlator_t *this = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
shandle = volinfo->shandle;
+
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+
ret = glusterd_volume_exclude_options_write(fd, volinfo);
- if (ret)
+ if (ret) {
goto out;
+ }
+
+ dict_data->shandle = shandle;
+ dict_data->key_check = 1;
shandle->fd = fd;
- dict_foreach(volinfo->dict, _storeopts, shandle);
+ dict_foreach(volinfo->dict, _storeopts, (void *)dict_data);
+
+ dict_data->key_check = 0;
+ dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data);
+
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
- dict_foreach(volinfo->gsync_slaves, _storeslaves, shandle);
shandle->fd = 0;
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
-int32_t
+static int32_t
glusterd_store_snapinfo_write(glusterd_snap_t *snap)
{
int32_t ret = -1;
int fd = 0;
- char buf[PATH_MAX] = "";
+ char buf[PATH_MAX];
+ uint total_len = 0;
GF_ASSERT(snap);
@@ -1174,30 +1066,34 @@ glusterd_store_snapinfo_write(glusterd_snap_t *snap)
if (fd <= 0)
goto out;
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_ID,
- uuid_utoa(snap->snap_id));
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", snap->snap_status);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", snap->snap_restored);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%s\n%s=%d\n%s=%d\n", GLUSTERD_STORE_KEY_SNAP_ID,
+ uuid_utoa(snap->snap_id), GLUSTERD_STORE_KEY_SNAP_STATUS,
+ snap->snap_status, GLUSTERD_STORE_KEY_SNAP_RESTORED,
+ snap->snap_restored);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
if (snap->description) {
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_DESC,
- snap->description);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_SNAP_DESC, snap->description);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
}
- snprintf(buf, sizeof(buf), "%ld", snap->time_stamp);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf);
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%ld\n",
+ GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, snap->time_stamp);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ ret = gf_store_save_items(fd, buf);
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
@@ -1360,112 +1256,34 @@ glusterd_store_create_snap_shandle_on_absence(glusterd_snap_t *snap)
return ret;
}
-int32_t
+static int32_t
glusterd_store_brickinfos(glusterd_volinfo_t *volinfo, int vol_fd)
{
int32_t ret = 0;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
int32_t brick_count = 0;
+ int32_t ta_brick_count = 0;
GF_ASSERT(volinfo);
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
- ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd);
+ ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd,
+ 0);
if (ret)
goto out;
brick_count++;
}
-out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
-}
-
-int
-_gd_store_rebalance_dict(dict_t *dict, char *key, data_t *value, void *data)
-{
- int ret = -1;
- int fd = 0;
-
- fd = *(int *)data;
-
- ret = gf_store_save_value(fd, key, value->data);
-
- return ret;
-}
-
-int32_t
-glusterd_store_state_tier_write(int fd, glusterd_volinfo_t *volinfo)
-{
- int ret = -1;
- char buf[PATH_MAX] = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO(THIS->name, (fd > 0), out);
- GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
-
- /*tier counter values are stored here. so that after restart
- * of glusterd tier resumes at the state is was brought down
- */
-
- if (volinfo->tier.defrag_cmd == GF_DEFRAG_CMD_STATUS) {
- ret = 0;
- goto out;
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = glusterd_store_brickinfo(volinfo, ta_brickinfo, ta_brick_count,
+ vol_fd, 1);
+ if (ret)
+ goto out;
}
- snprintf(buf, sizeof(buf), "%d", volinfo->tier.defrag_status);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_TIER_STATUS, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->tier.op);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_TIER_DETACH_OP, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->tier.rebalance_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->tier.rebalance_data);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->tier.lookedup_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED,
- buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->tier.rebalance_failures);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES,
- buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->tier.skipped_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED,
- buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%f", volinfo->tier.rebalance_time);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME,
- buf);
- if (ret)
- goto out;
-
- gf_uuid_unparse(volinfo->tier.rebalance_id, buf);
- ret = gf_store_save_value(fd, GF_TIER_TID_KEY, buf);
- if (ret)
- goto out;
-
- if (volinfo->tier.dict) {
- dict_foreach(volinfo->tier.dict, _gd_store_rebalance_dict, &fd);
- }
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
@@ -1475,9 +1293,15 @@ int32_t
glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
{
int ret = -1;
- char buf[PATH_MAX] = {
- 0,
- };
+ char buf[PATH_MAX];
+ char uuid[UUID_SIZE + 1];
+ uint total_len = 0;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ gf_store_handle_t shandle;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
@@ -1487,61 +1311,63 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
goto out;
}
- snprintf(buf, sizeof(buf), "%d", volinfo->rebal.defrag_cmd);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->rebal.defrag_status);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%d", volinfo->rebal.op);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_DEFRAG_OP, buf);
- if (ret)
- goto out;
-
- gf_uuid_unparse(volinfo->rebal.rebalance_id, buf);
- ret = gf_store_save_value(fd, GF_REBALANCE_TID_KEY, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->rebal.rebalance_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->rebal.rebalance_data);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->rebal.lookedup_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, buf);
- if (ret)
- goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->rebal.rebalance_failures);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, buf);
- if (ret)
+ gf_uuid_unparse(volinfo->rebal.rebalance_id, uuid);
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%d\n%s=%s\n",
+ GLUSTERD_STORE_KEY_VOL_DEFRAG, volinfo->rebal.defrag_cmd,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS,
+ volinfo->rebal.defrag_status, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ volinfo->rebal.op, GF_REBALANCE_TID_KEY, uuid);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
-
- snprintf(buf, sizeof(buf), "%" PRIu64, volinfo->rebal.skipped_files);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, buf);
- if (ret)
+ }
+ total_len += ret;
+
+ ret = snprintf(
+ buf + total_len, sizeof(buf) - total_len,
+ "%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64
+ "\n%s=%" PRIu64 "\n%s=%lf\n",
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, volinfo->rebal.rebalance_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, volinfo->rebal.rebalance_data,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, volinfo->rebal.lookedup_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ volinfo->rebal.rebalance_failures,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, volinfo->rebal.skipped_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, volinfo->rebal.rebalance_time);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
- snprintf(buf, sizeof(buf), "%lf", volinfo->rebal.rebalance_time);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, buf);
- if (ret)
+ ret = gf_store_save_items(fd, buf);
+ if (ret) {
goto out;
+ }
if (volinfo->rebal.dict) {
- dict_foreach(volinfo->rebal.dict, _gd_store_rebalance_dict, &fd);
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = &shandle;
+ shandle.fd = fd;
+ dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ NULL);
+ goto out;
+ ;
+ }
+ }
}
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1562,12 +1388,6 @@ glusterd_store_perform_node_state_store(glusterd_volinfo_t *volinfo)
if (ret)
goto out;
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_store_state_tier_write(fd, volinfo);
- if (ret)
- goto out;
- }
-
ret = gf_store_rename_tmppath(volinfo->node_state_shandle);
if (ret)
goto out;
@@ -1579,7 +1399,7 @@ out:
return ret;
}
-int32_t
+static int32_t
glusterd_store_perform_volume_store(glusterd_volinfo_t *volinfo)
{
int fd = -1;
@@ -1596,6 +1416,10 @@ glusterd_store_perform_volume_store(glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ ret = glusterd_store_create_brick_dir(volinfo);
+ if (ret)
+ goto out;
+
ret = glusterd_store_brickinfos(volinfo, fd);
if (ret)
goto out;
@@ -1657,6 +1481,7 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)
{
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
GF_ASSERT(volinfo);
@@ -1666,6 +1491,15 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)
if (ret)
goto out;
}
+
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = gf_store_rename_tmppath(ta_brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
+
out:
return ret;
}
@@ -1781,11 +1615,8 @@ glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
pthread_mutex_lock(&volinfo->store_volinfo_lock);
{
glusterd_perform_volinfo_version_action(volinfo, ac);
- ret = glusterd_store_create_volume_dir(volinfo);
- if (ret)
- goto unlock;
- ret = glusterd_store_create_volume_run_dir(volinfo);
+ ret = glusterd_store_create_volume_dirs(volinfo);
if (ret)
goto unlock;
@@ -1820,6 +1651,7 @@ glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
unlock:
pthread_mutex_unlock(&volinfo->store_volinfo_lock);
pthread_mutex_unlock(&ctx->cleanup_lock);
+
if (ret)
glusterd_store_volume_cleanup_tmp(volinfo);
@@ -1869,7 +1701,7 @@ glusterd_store_delete_volume(glusterd_volinfo_t *volinfo)
goto out;
}
- ret = sys_mkdir(trashdir, 0777);
+ ret = sys_mkdir(trashdir, 0755);
if (ret && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
"Failed to create trash "
@@ -1956,7 +1788,7 @@ glusterd_store_delete_snap(glusterd_snap_t *snap)
goto out;
}
- ret = sys_mkdir(trashdir, 0777);
+ ret = sys_mkdir(trashdir, 0755);
if (ret && errno != EEXIST) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
"Failed to create trash "
@@ -1981,8 +1813,9 @@ glusterd_store_delete_snap(glusterd_snap_t *snap)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
len = snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
if ((len < 0) || (len >= PATH_MAX)) {
goto stat_failed;
@@ -2012,7 +1845,6 @@ glusterd_store_delete_snap(glusterd_snap_t *snap)
ret ? "Failed to remove" : "Removed", entry->d_name);
stat_failed:
memset(path, 0, sizeof(path));
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = sys_closedir(dir);
@@ -2045,15 +1877,10 @@ glusterd_store_global_info(xlator_t *this)
{
int ret = -1;
glusterd_conf_t *conf = NULL;
- char op_version_str[15] = {
- 0,
- };
- char path[PATH_MAX] = {
- 0,
- };
+ char buf[PATH_MAX];
+ uint total_len = 0;
gf_store_handle_t *handle = NULL;
char *uuid_str = NULL;
- int32_t len = 0;
conf = this->private;
@@ -2062,12 +1889,13 @@ glusterd_store_global_info(xlator_t *this)
goto out;
if (!conf->handle) {
- len = snprintf(path, PATH_MAX, "%s/%s", conf->workdir,
+ ret = snprintf(buf, sizeof(buf), "%s/%s", conf->workdir,
GLUSTERD_INFO_FILE);
- if ((len < 0) || (len >= PATH_MAX)) {
+ if ((ret < 0) || (ret >= sizeof(buf))) {
+ ret = -1;
goto out;
}
- ret = gf_store_handle_new(path, &handle);
+ ret = gf_store_handle_new(buf, &handle);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL,
"Unable to get store handle");
@@ -2087,39 +1915,38 @@ glusterd_store_global_info(xlator_t *this)
}
handle->fd = gf_store_mkstemp(handle);
- if (handle->fd <= 0) {
+ if (handle->fd < 0) {
ret = -1;
goto out;
}
- pthread_mutex_lock(&conf->mutex);
- {
- ret = gf_store_save_value(handle->fd, GLUSTERD_STORE_UUID_KEY,
- uuid_str);
+
+ ret = snprintf(buf, sizeof(buf), "%s=%s\n", GLUSTERD_STORE_UUID_KEY,
+ uuid_str);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
+ goto out;
}
- pthread_mutex_unlock(&conf->mutex);
- if (ret) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_UUID_SET_FAIL,
- "Storing uuid failed ret = %d", ret);
+ total_len += ret;
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GD_OP_VERSION_KEY, conf->op_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
}
- snprintf(op_version_str, 15, "%d", conf->op_version);
- ret = gf_store_save_value(handle->fd, GD_OP_VERSION_KEY, op_version_str);
+ ret = gf_store_save_items(handle->fd, buf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL,
- "Storing op-version failed ret = %d", ret);
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Storing glusterd global-info failed ret = %d", ret);
goto out;
}
ret = gf_store_rename_tmppath(handle);
out:
if (handle) {
- if (ret && (handle->fd > 0))
+ if (ret && (handle->fd >= 0))
gf_store_unlink_tmppath(handle);
-
- if (handle->fd > 0) {
- handle->fd = 0;
- }
}
if (uuid_str)
@@ -2134,7 +1961,74 @@ out:
}
int
-glusterd_retrieve_op_version(xlator_t *this, int *op_version)
+glusterd_store_max_op_version(xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char op_version_str[15] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ gf_store_handle_t *handle = NULL;
+ int32_t len = 0;
+
+ conf = this->private;
+
+ len = snprintf(path, PATH_MAX, "%s/%s", conf->workdir,
+ GLUSTERD_UPGRADE_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ ret = gf_store_handle_new(path, &handle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle");
+ goto out;
+ }
+
+ /* These options need to be available for all users */
+ ret = sys_chmod(handle->path, 0644);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "chmod error for %s", GLUSTERD_UPGRADE_FILE);
+ goto out;
+ }
+
+ handle->fd = gf_store_mkstemp(handle);
+ if (handle->fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(op_version_str, sizeof(op_version_str), "%d", GD_OP_VERSION_MAX);
+ ret = gf_store_save_value(handle->fd, GD_MAX_OP_VERSION_KEY,
+ op_version_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Storing op-version failed ret = %d", ret);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(handle);
+out:
+ if (handle) {
+ if (ret && (handle->fd >= 0))
+ gf_store_unlink_tmppath(handle);
+ }
+
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL,
+ "Failed to store max op-version");
+ if (handle)
+ gf_store_handle_destroy(handle);
+ return ret;
+}
+
+int
+glusterd_retrieve_max_op_version(xlator_t *this, int *op_version)
{
char *op_version_str = NULL;
glusterd_conf_t *priv = NULL;
@@ -2149,25 +2043,21 @@ glusterd_retrieve_op_version(xlator_t *this, int *op_version)
priv = this->private;
- if (!priv->handle) {
- len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
- GLUSTERD_INFO_FILE);
- if ((len < 0) || (len >= PATH_MAX)) {
- goto out;
- }
- ret = gf_store_handle_retrieve(path, &handle);
-
- if (ret) {
- gf_msg_debug(this->name, 0,
- "Unable to get store "
- "handle!");
- goto out;
- }
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_UPGRADE_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ ret = gf_store_handle_retrieve(path, &handle);
- priv->handle = handle;
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Unable to get store "
+ "handle!");
+ goto out;
}
- ret = gf_store_retrieve_value(priv->handle, GD_OP_VERSION_KEY,
+ ret = gf_store_retrieve_value(handle, GD_MAX_OP_VERSION_KEY,
&op_version_str);
if (ret) {
gf_msg_debug(this->name, 0, "No previous op_version present");
@@ -2187,17 +2077,18 @@ glusterd_retrieve_op_version(xlator_t *this, int *op_version)
out:
if (op_version_str)
GF_FREE(op_version_str);
-
+ if (handle)
+ gf_store_handle_destroy(handle);
return ret;
}
int
-glusterd_retrieve_sys_snap_max_limit(xlator_t *this, uint64_t *limit, char *key)
+glusterd_retrieve_op_version(xlator_t *this, int *op_version)
{
- char *limit_str = NULL;
+ char *op_version_str = NULL;
glusterd_conf_t *priv = NULL;
int ret = -1;
- uint64_t tmp_limit = 0;
+ int tmp_version = 0;
char *tmp = NULL;
char path[PATH_MAX] = {
0,
@@ -2205,13 +2096,8 @@ glusterd_retrieve_sys_snap_max_limit(xlator_t *this, uint64_t *limit, char *key)
gf_store_handle_t *handle = NULL;
int32_t len = 0;
- GF_ASSERT(this);
priv = this->private;
- GF_ASSERT(priv);
- GF_ASSERT(limit);
- GF_ASSERT(key);
-
if (!priv->handle) {
len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
GLUSTERD_INFO_FILE);
@@ -2230,25 +2116,26 @@ glusterd_retrieve_sys_snap_max_limit(xlator_t *this, uint64_t *limit, char *key)
priv->handle = handle;
}
- ret = gf_store_retrieve_value(priv->handle, key, &limit_str);
+ ret = gf_store_retrieve_value(priv->handle, GD_OP_VERSION_KEY,
+ &op_version_str);
if (ret) {
- gf_msg_debug(this->name, 0, "No previous %s present", key);
+ gf_msg_debug(this->name, 0, "No previous op_version present");
goto out;
}
- tmp_limit = strtoul(limit_str, &tmp, 10);
- if ((tmp_limit <= 0) || (tmp && strlen(tmp) > 1)) {
+ tmp_version = strtol(op_version_str, &tmp, 10);
+ if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) {
gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_UNSUPPORTED_VERSION,
"invalid version number");
goto out;
}
- *limit = tmp_limit;
+ *op_version = tmp_version;
ret = 0;
out:
- if (limit_str)
- GF_FREE(limit_str);
+ if (op_version_str)
+ GF_FREE(op_version_str);
return ret;
}
@@ -2454,7 +2341,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2468,6 +2355,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
{
int32_t ret = 0;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
gf_store_iter_t *iter = NULL;
char *key = NULL;
char *value = NULL;
@@ -2479,7 +2367,8 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
};
glusterd_conf_t *priv = NULL;
int32_t brick_count = 0;
- char tmpkey[4096] = {
+ int32_t ta_brick_count = 0;
+ char tmpkey[32] = {
0,
};
gf_store_iter_t *tmpiter = NULL;
@@ -2488,6 +2377,10 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
struct pmap_registry *pmap = NULL;
xlator_t *this = NULL;
int brickid = 0;
+ /* ta_brick_id initialization with 2 since ta-brick id starts with
+ * volname-ta-2
+ */
+ int ta_brick_id = 2;
gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
int32_t len = 0;
@@ -2569,7 +2462,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
}
} else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT,
SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) {
- gf_string2int(value, &brickinfo->port);
+ ret = gf_string2int(value, &brickinfo->port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
if (brickinfo->port < priv->base_port) {
/* This is required to adhere to the
@@ -2584,7 +2483,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
}
} else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
- gf_string2int(value, &brickinfo->rdma_port);
+ ret = gf_string2int(value, &brickinfo->rdma_port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
if (brickinfo->rdma_port < priv->base_port) {
/* This is required to adhere to the
@@ -2760,25 +2665,203 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
brickinfo->path);
/* No need for treating it as an error, lets continue
with just a message */
+ } else {
+ brickinfo->statfs_fsid = brickstat.f_fsid;
}
- brickinfo->statfs_fsid = brickstat.f_fsid;
}
cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks);
brick_count++;
}
+ if (gf_store_iter_destroy(&tmpiter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_iter_new(volinfo->shandle, &tmpiter);
+
+ if (ret)
+ goto out;
+
+ if (volinfo->thin_arbiter_count == 1) {
+ snprintf(tmpkey, sizeof(tmpkey), "%s-%d",
+ GLUSTERD_STORE_KEY_VOL_TA_BRICK, 0);
+ while (ta_brick_count < volinfo->subvol_count) {
+ ret = glusterd_brickinfo_new(&ta_brickinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue);
+
+ len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue);
+ GF_FREE(tmpvalue);
+ tmpvalue = NULL;
+ if ((len < 0) || (len >= sizeof(path))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &ta_brickinfo->shandle);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new(ta_brickinfo->shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_STORE_ITER_GET_FAIL,
+ "Unable to iterate "
+ "the store for brick: %s",
+ path);
+ goto out;
+ }
+
+ while (!ret) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ if (snprintf(ta_brickinfo->hostname,
+ sizeof(ta_brickinfo->hostname), "%s",
+ value) >= sizeof(ta_brickinfo->hostname)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick hostname truncated: %s",
+ ta_brickinfo->hostname);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ if (snprintf(ta_brickinfo->path, sizeof(ta_brickinfo->path),
+ "%s", value) >= sizeof(ta_brickinfo->path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick path truncated: %s", ta_brickinfo->path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) {
+ if (snprintf(ta_brickinfo->real_path,
+ sizeof(ta_brickinfo->real_path), "%s",
+ value) >= sizeof(ta_brickinfo->real_path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "real_path truncated: %s",
+ ta_brickinfo->real_path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->port = 0;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->rdma_port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->rdma_port = 0;
+ }
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ ret = gf_string2int(value, &ta_brickinfo->decommissioned);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
+ if (snprintf(ta_brickinfo->brick_id,
+ sizeof(ta_brickinfo->brick_id), "%s",
+ value) >= sizeof(ta_brickinfo->brick_id)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick_id truncated: %s",
+ ta_brickinfo->brick_id);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) {
+ ret = gf_string2uint64(value, &ta_brickinfo->statfs_fsid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY,
+ "%s "
+ "is not a valid uint64_t value",
+ value);
+ }
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) {
+ gf_uuid_parse(value, brickinfo->uuid);
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ ret = gf_string2int(value, &ta_brickinfo->snap_status);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
+ }
+
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+
+ GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo,
+ ta_brick_id);
+ ta_brick_id += 3;
+
+ cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks);
+ ta_brick_count++;
+ }
+ }
+
assign_brick_groups(volinfo);
ret = 0;
out:
- if (gf_store_iter_destroy(tmpiter)) {
+ if (gf_store_iter_destroy(&tmpiter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
}
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2851,52 +2934,22 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
volinfo->rebal.op = atoi(value);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) {
- volinfo->rebal.rebalance_files = atoi(value);
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) {
- volinfo->rebal.rebalance_data = atoi(value);
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) {
- volinfo->rebal.lookedup_files = atoi(value);
+ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) {
- volinfo->rebal.rebalance_failures = atoi(value);
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) {
- volinfo->rebal.skipped_files = atoi(value);
+ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) {
volinfo->rebal.rebalance_time = atoi(value);
-
- /* if none of the above keys match then its related to tier
- * so we get the values and store it on volinfo->tier
- */
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_TIER_STATUS,
- SLEN(GLUSTERD_STORE_KEY_VOL_TIER_STATUS))) {
- volinfo->tier.defrag_status = atoi(value);
- } else if (!strncmp(key, GF_TIER_TID_KEY, SLEN(GF_TIER_TID_KEY))) {
- gf_uuid_parse(value, volinfo->tier.rebalance_id);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_TIER_DETACH_OP,
- SLEN(GLUSTERD_STORE_KEY_TIER_DETACH_OP))) {
- volinfo->tier.op = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES))) {
- volinfo->tier.rebalance_files = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE))) {
- volinfo->tier.rebalance_data = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED))) {
- volinfo->tier.lookedup_files = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES))) {
- volinfo->tier.rebalance_failures = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED))) {
- volinfo->tier.skipped_files = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME,
- SLEN(GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME))) {
- volinfo->tier.rebalance_time = atoi(value);
} else {
if (!tmp_dict) {
tmp_dict = dict_new();
@@ -2930,10 +2983,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
}
if (tmp_dict) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- volinfo->tier.dict = dict_ref(tmp_dict);
- else
- volinfo->rebal.dict = dict_ref(tmp_dict);
+ volinfo->rebal.dict = dict_ref(tmp_dict);
}
if (op_errno != GD_STORE_EOF) {
@@ -2944,7 +2994,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2955,8 +3005,6 @@ out:
if (ret) {
if (volinfo->rebal.dict)
dict_unref(volinfo->rebal.dict);
- else if (volinfo->tier.dict)
- dict_unref(volinfo->tier.dict);
}
if (tmp_dict)
dict_unref(tmp_dict);
@@ -3049,6 +3097,8 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
volinfo->replica_count = atoi(value);
} else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) {
volinfo->arbiter_count = atoi(value);
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT)) {
+ volinfo->thin_arbiter_count = atoi(value);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
SLEN(GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
volinfo->disperse_count = atoi(value);
@@ -3093,9 +3143,6 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
SLEN(GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
volinfo->client_op_version = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_CAPS,
- SLEN(GLUSTERD_STORE_KEY_VOL_CAPS))) {
- volinfo->caps = atoi(value);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
SLEN(GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) {
volinfo->snap_max_hard_limit = (uint64_t)atoll(value);
@@ -3115,28 +3162,6 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
"parent_volname truncated: %s", volinfo->parent_volname);
goto out;
}
- } else if (!strncmp(key, GF_TIER_ENABLED, SLEN(GF_TIER_ENABLED))) {
- volinfo->is_tier_enabled = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_COLD_COUNT, strlen(key))) {
- volinfo->tier_info.cold_brick_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
- strlen(key))) {
- volinfo->tier_info.cold_replica_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
- strlen(key))) {
- volinfo->tier_info.cold_disperse_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT,
- strlen(key))) {
- volinfo->tier_info.cold_redundancy_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_HOT_COUNT, strlen(key))) {
- volinfo->tier_info.hot_brick_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
- strlen(key))) {
- volinfo->tier_info.hot_replica_count = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_HOT_TYPE, strlen(key))) {
- volinfo->tier_info.hot_type = atoi(value);
- } else if (!strncmp(key, GLUSTERD_STORE_KEY_COLD_TYPE, strlen(key))) {
- volinfo->tier_info.cold_type = atoi(value);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
SLEN(GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION))) {
volinfo->quota_xattr_version = atoi(value);
@@ -3155,8 +3180,11 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
case 0:
/*Ignore GLUSTERD_STORE_KEY_VOL_BRICK since
- glusterd_store_retrieve_bricks gets it later*/
- if (!strstr(key, GLUSTERD_STORE_KEY_VOL_BRICK))
+ glusterd_store_retrieve_bricks gets it later.
+ also, ignore tier-enabled key as we deprecated
+ tier xlator*/
+ if (!strstr(key, GLUSTERD_STORE_KEY_VOL_BRICK) ||
+ !strstr(key, GF_TIER_ENABLED))
gf_msg(this->name, GF_LOG_WARNING, 0,
GD_MSG_UNKNOWN_KEY, "Unknown key: %s", key);
break;
@@ -3212,15 +3240,13 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
GF_ASSERT(volinfo->redundancy_count > 0);
break;
- case GF_CLUSTER_TYPE_TIER:
- if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_DISPERSE)
- volinfo->tier_info
- .cold_dist_leaf_count = volinfo->disperse_count;
- else
- volinfo->tier_info
- .cold_dist_leaf_count = glusterd_calc_dist_leaf_count(
- volinfo->tier_info.cold_replica_count, 1);
-
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP,
+ GD_MSG_VOLINFO_STORE_FAIL,
+ "The volume type is no more supported. Please refer to "
+ "glusterfs-6.0 release-notes for how to migrate from "
+ "this volume type");
break;
default:
@@ -3244,7 +3270,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3349,20 +3375,6 @@ glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len)
snprintf(path, len, "%s/options", conf->workdir);
}
-int
-_store_global_opts(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_store_handle_t *shandle = data;
-
- if (gf_store_save_value(shandle->fd, key, (char *)value->data)) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store handle for key : %s, value %s", key,
- (char *)value->data);
- }
-
- return 0;
-}
-
int32_t
glusterd_store_options(xlator_t *this, dict_t *opts)
{
@@ -3371,13 +3383,15 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
char path[PATH_MAX] = {0};
int fd = -1;
int32_t ret = -1;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
conf = this->private;
glusterd_store_set_options_path(conf, path, sizeof(path));
ret = gf_store_handle_new(path, &shandle);
- if (ret)
+ if (ret) {
goto out;
+ }
fd = gf_store_mkstemp(shandle);
if (fd <= 0) {
@@ -3385,15 +3399,30 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
goto out;
}
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = shandle;
shandle->fd = fd;
- dict_foreach(opts, _store_global_opts, shandle);
- shandle->fd = 0;
+ dict_foreach(opts, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
+
ret = gf_store_rename_tmppath(shandle);
- if (ret)
- goto out;
out:
- if ((ret < 0) && (fd > 0))
+ shandle->fd = 0;
+ GF_FREE(dict_data);
+ if ((ret < 0) && (fd > 0)) {
gf_store_unlink_tmppath(shandle);
+ }
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3439,7 +3468,7 @@ glusterd_store_retrieve_options(xlator_t *this)
goto out;
ret = 0;
out:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3491,28 +3520,28 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
-
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (snap && ((!strcmp(entry->d_name, "geo-replication")) ||
(!strcmp(entry->d_name, "info"))))
- goto next;
+ continue;
len = snprintf(entry_path, PATH_MAX, "%s/%s", path, entry->d_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- goto next;
- }
+ if ((len < 0) || (len >= PATH_MAX))
+ continue;
+
ret = sys_lstat(entry_path, &st);
if (ret == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
"Failed to stat entry %s : %s", path, strerror(errno));
- goto next;
+ continue;
}
if (!S_ISDIR(st.st_mode)) {
gf_msg_debug(this->name, 0, "%s is not a valid volume",
entry->d_name);
- goto next;
+ continue;
}
volinfo = glusterd_store_retrieve_volume(entry->d_name, snap);
@@ -3535,8 +3564,6 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap)
glusterd_store_create_nodestate_sh_on_absence(volinfo);
glusterd_store_perform_node_state_store(volinfo);
}
- next:
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = 0;
@@ -3710,7 +3737,7 @@ glusterd_recreate_vol_brick_mounts(xlator_t *this, glusterd_volinfo_t *volinfo)
ret = sys_lstat(brickinfo->path, &st_buf);
if (ret) {
if (errno == ENOENT) {
- ret = mkdir_p(brick_mount_path, 0777, _gf_true);
+ ret = mkdir_p(brick_mount_path, 0755, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno,
GD_MSG_CREATE_DIR_FAILED, "Failed to create %s. ",
@@ -3891,7 +3918,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3957,7 +3984,6 @@ out:
int32_t
glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
{
- char buf[PATH_MAX] = "";
char path[PATH_MAX] = "";
char *snap_vol_id = NULL;
char *missed_node_info = NULL;
@@ -3994,8 +4020,8 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
}
do {
- ret = gf_store_read_and_tokenize(
- fp, buf, sizeof(buf), &missed_node_info, &value, &store_errno);
+ ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
+ &store_errno);
if (ret) {
if (store_errno == GD_STORE_EOF) {
gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
@@ -4087,9 +4113,9 @@ glusterd_store_retrieve_snaps(xlator_t *this)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
-
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (strcmp(entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) {
ret = glusterd_store_retrieve_snap(entry->d_name);
if (ret) {
@@ -4098,7 +4124,6 @@ glusterd_store_retrieve_snaps(xlator_t *this)
goto out;
}
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
/* Retrieve missed_snaps_list */
@@ -4120,8 +4145,8 @@ out:
int32_t
glusterd_store_write_missed_snapinfo(int32_t fd)
{
- char key[PATH_MAX] = "";
- char value[PATH_MAX] = "";
+ char key[(UUID_SIZE * 2) + 2];
+ char value[PATH_MAX];
int32_t ret = -1;
glusterd_conf_t *priv = NULL;
glusterd_missed_snap_info *missed_snapinfo = NULL;
@@ -4410,41 +4435,39 @@ glusterd_store_create_peer_shandle(glusterd_peerinfo_t *peerinfo)
return ret;
}
-int32_t
+static int32_t
glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo)
{
- char buf[50] = {0};
+ char buf[PATH_MAX];
+ uint total_len = 0;
int32_t ret = 0;
int32_t i = 1;
glusterd_peer_hostname_t *hostname = NULL;
- char *key = NULL;
-
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_PEER_UUID,
- uuid_utoa(peerinfo->uuid));
- if (ret)
- goto out;
- snprintf(buf, sizeof(buf), "%d", peerinfo->state.state);
- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_PEER_STATE, buf);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n%s=%d\n",
+ GLUSTERD_STORE_KEY_PEER_UUID, uuid_utoa(peerinfo->uuid),
+ GLUSTERD_STORE_KEY_PEER_STATE, peerinfo->state.state);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
+ }
+ total_len += ret;
cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list)
{
- ret = gf_asprintf(&key, GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d", i);
- if (ret < 0)
- goto out;
- ret = gf_store_save_value(fd, key, hostname->hostname);
- if (ret)
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d=%s\n", i,
+ hostname->hostname);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
goto out;
- GF_FREE(key);
- key = NULL;
+ }
+ total_len += ret;
i++;
}
+ ret = gf_store_save_items(fd, buf);
out:
- if (key)
- GF_FREE(key);
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
return ret;
}
@@ -4547,11 +4570,9 @@ glusterd_store_retrieve_peers(xlator_t *this)
goto out;
}
- for (;;) {
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- if (!entry) {
- break;
- }
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
if (gf_uuid_parse(entry->d_name, tmp_uuid) != 0) {
gf_log(this->name, GF_LOG_WARNING, "skipping non-peer file %s",
entry->d_name);
@@ -4629,10 +4650,6 @@ glusterd_store_retrieve_peers(xlator_t *this)
*/
address = cds_list_entry(peerinfo->hostnames.next,
glusterd_peer_hostname_t, hostname_list);
- if (!address) {
- ret = -1;
- goto next;
- }
peerinfo->hostname = gf_strdup(address->hostname);
ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL);
@@ -4643,7 +4660,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
is_ok = _gf_true;
next:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
if (!is_ok) {
gf_log(this->name, GF_LOG_WARNING,
@@ -4657,14 +4674,14 @@ glusterd_store_retrieve_peers(xlator_t *this)
args.mode = GD_MODE_ON;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
ret = glusterd_friend_rpc_create(this, peerinfo, &args);
if (ret)
break;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
peerinfo = NULL;
out:
@@ -4826,7 +4843,9 @@ glusterd_resolve_all_bricks(xlator_t *this)
"peer=%s;volume=%s;brick=%s", brickinfo->hostname,
volinfo->volname, brickinfo->path);
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL,
- "resolve brick failed in restore");
+ "Failed to resolve brick %s with host %s of volume %s"
+ " in restore",
+ brickinfo->path, brickinfo->hostname, volinfo->volname);
goto out;
}
}
@@ -4975,10 +4994,10 @@ glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo)
glusterd_conf_t *conf = NULL;
xlator_t *this = NULL;
char path[PATH_MAX] = {0};
- char cksum_path[PATH_MAX] = {
+ char cksum_path[PATH_MAX + 32] = {
0,
};
- char buf[256] = {0};
+ char buf[64] = {0};
int fd = -1;
int32_t ret = -1;
int32_t len = 0;
@@ -5003,19 +5022,17 @@ glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo)
goto out;
}
- snprintf(buf, sizeof(buf) - 1, "%u", volinfo->quota_conf_cksum);
- ret = gf_store_save_value(fd, "cksum", buf);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_STORE_FAIL,
- "Failed to store cksum");
+ ret = snprintf(buf, sizeof(buf), "cksum=%u\nversion=%u\n",
+ volinfo->quota_conf_cksum, volinfo->quota_conf_version);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
goto out;
}
- snprintf(buf, sizeof(buf) - 1, "%u", volinfo->quota_conf_version);
- ret = gf_store_save_value(fd, "version", buf);
+ ret = gf_store_save_items(fd, buf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERS_STORE_FAIL,
- "Failed to store version");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_STORE_FAIL,
+ "Failed to store quota cksum and version");
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 5db77703482..83f4df0783e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -11,14 +11,14 @@
#define _GLUSTERD_HA_H_
#include <pthread.h>
-#include "compat-uuid.h"
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "run.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "byte-order.h"
+#include <glusterfs/compat-uuid.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/run.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "glusterd.h"
#include "rpcsvc.h"
@@ -28,6 +28,8 @@ typedef enum glusterd_store_ver_ac_ {
GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2,
} glusterd_volinfo_ver_ac_t;
+#define UUID_SIZE 36
+#define VOLINFO_BUFFER_SIZE 4093
#define GLUSTERD_STORE_UUID_KEY "UUID"
#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
@@ -40,7 +42,9 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
#define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count"
+#define GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT "thin_arbiter_count"
#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
+#define GLUSTERD_STORE_KEY_VOL_TA_BRICK "ta-brick"
#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
@@ -59,17 +63,6 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
#define GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION "quota-version"
-#define GLUSTERD_STORE_KEY_VOL_TIER_STATUS "tier_status"
-#define GLUSTERD_STORE_KEY_TIER_DETACH_OP "tier_op"
-#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type"
-#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count"
-#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count"
-#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count"
-#define GLUSTERD_STORE_KEY_COLD_REDUNDANCY_COUNT "cold_redundancy_count"
-#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type"
-#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count"
-#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count"
-
#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
@@ -101,8 +94,7 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
#define GLUSTERD_STORE_KEY_PEER_STATE "state"
-
-#define GLUSTERD_STORE_KEY_VOL_CAPS "caps"
+#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" /* left just for backward compat */
#define GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES "rebalanced-files"
#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE "size"
@@ -118,6 +110,21 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped"
#define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time"
+#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
+
+/*
+ * The structure is responsible for handling the parameter for writes into
+ * the buffer before it is finally written to the file. The writes will be
+ * of the form of key-value pairs.
+ */
+struct glusterd_volinfo_data_store_ {
+ gf_store_handle_t *shandle; /*Contains fd and path of the file */
+ int16_t buffer_len;
+ char key_check; /* flag to check if key is to be validated before write*/
+ char buffer[VOLINFO_BUFFER_SIZE];
+};
+typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t;
+
int32_t
glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
glusterd_volinfo_ver_ac_t ac);
@@ -156,6 +163,12 @@ int
glusterd_retrieve_op_version(xlator_t *this, int *op_version);
int
+glusterd_retrieve_max_op_version(xlator_t *this, int *op_version);
+
+int
+glusterd_store_max_op_version(xlator_t *this);
+
+int
glusterd_store_global_info(xlator_t *this);
int32_t
@@ -171,9 +184,6 @@ void
glusterd_replace_slash_with_hyphen(char *str);
int32_t
-glusterd_store_perform_volume_store(glusterd_volinfo_t *volinfo);
-
-int32_t
glusterd_store_create_quota_conf_sh_on_absence(glusterd_volinfo_t *volinfo);
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
index 3007d92f539..ca845903c4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -7,25 +7,28 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
+#include <signal.h>
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glusterd-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-shd-svc.h"
#include "glusterd-quotad-svc.h"
+#ifdef BUILD_GNFS
#include "glusterd-nfs-svc.h"
+#endif
#include "glusterd-bitd-svc.h"
-#include "glusterd-tierd-svc.h"
-#include "glusterd-tierd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
#include "glusterd-scrub-svc.h"
#include "glusterd-svc-helper.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
+#include "glusterd-snapshot-utils.h"
int
-glusterd_svcs_reconfigure()
+glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo)
{
int ret = 0;
xlator_t *this = THIS;
@@ -37,15 +40,18 @@ glusterd_svcs_reconfigure()
conf = this->private;
GF_ASSERT(conf);
+#ifdef BUILD_GNFS
svc_name = "nfs";
ret = glusterd_nfssvc_reconfigure();
if (ret)
goto out;
-
+#endif
svc_name = "self-heald";
- ret = glusterd_shdsvc_reconfigure();
- if (ret)
- goto out;
+ if (volinfo) {
+ ret = glusterd_shdsvc_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
if (conf->op_version == GD_OP_VERSION_MIN)
goto out;
@@ -69,7 +75,7 @@ out:
}
int
-glusterd_svcs_stop()
+glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
{
int ret = 0;
xlator_t *this = NULL;
@@ -81,23 +87,27 @@ glusterd_svcs_stop()
priv = this->private;
GF_ASSERT(priv);
- ret = glusterd_svc_stop(&(priv->nfs_svc), SIGKILL);
+#ifdef BUILD_GNFS
+ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
if (ret)
goto out;
-
- ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM);
+#endif
+ ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM);
if (ret)
goto out;
- ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
- if (ret)
- goto out;
+ if (volinfo) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret)
+ goto out;
+ }
- ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM);
+ ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM);
if (ret)
goto out;
- ret = glusterd_svc_stop(&(priv->scrub_svc), SIGTERM);
+ ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM);
+
out:
return ret;
}
@@ -117,16 +127,11 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
if (volinfo && volinfo->is_snap_volume)
return 0;
+#if BUILD_GNFS
ret = conf->nfs_svc.manager(&(conf->nfs_svc), NULL, PROC_START_NO_WAIT);
if (ret)
goto out;
-
- ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT);
- if (ret == -EINVAL)
- ret = 0;
- if (ret)
- goto out;
-
+#endif
if (conf->op_version == GD_OP_VERSION_MIN)
goto out;
@@ -143,6 +148,15 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ if (volinfo) {
+ ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+ }
+
ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT);
if (ret == -EINVAL)
ret = 0;
@@ -179,7 +193,7 @@ glusterd_svc_check_volfile_identical(char *svc_name,
goto out;
}
- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
tmp_fd = mkstemp(tmpvol);
if (tmp_fd < 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
@@ -225,8 +239,10 @@ glusterd_svc_check_topology_identical(char *svc_name,
int tmpclean = 0;
int tmpfd = -1;
- if ((!identical) || (!this) || (!this->private))
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
goto out;
+ }
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
@@ -241,7 +257,7 @@ glusterd_svc_check_topology_identical(char *svc_name,
goto out;
}
- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
tmpfd = mkstemp(tmpvol);
if (tmpfd < 0) {
gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
@@ -269,3 +285,763 @@ out:
GF_FREE(tmpvol);
return ret;
}
+
+int
+glusterd_volume_svc_check_volfile_identical(
+ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+ int tmp_fd = -1;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, identical, out);
+
+ /* This builds volfile for volume level dameons */
+ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+ sizeof(orgvol));
+
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ need_unlink = 1;
+
+ ret = builder(volinfo, tmpvol, mode_dict);
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
+out:
+ if (need_unlink)
+ sys_unlink(tmpvol);
+
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+
+ if (tmp_fd >= 0)
+ sys_close(tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_volume_svc_check_topology_identical(
+ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = THIS;
+ int ret = -1;
+ int tmpclean = 0;
+ int tmpfd = -1;
+
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* This builds volfile for volume level dameons */
+ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+ sizeof(orgvol));
+ /* Create the temporary volfile */
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmpfd = mkstemp(tmpvol);
+ if (tmpfd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ ret = builder(volinfo, tmpvol, mode_dict);
+ if (ret)
+ goto out;
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
+out:
+ if (tmpfd >= 0)
+ sys_close(tmpfd);
+ if (tmpclean)
+ sys_unlink(tmpvol);
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_svcproc_attachable(glusterd_svc_proc_t *svc_proc)
+{
+ int pid = -1;
+ glusterd_svc_t *parent_svc = NULL;
+
+ if (!svc_proc)
+ return _gf_false;
+
+ if (svc_proc->status == GF_SVC_STARTING)
+ return _gf_true;
+
+ if (svc_proc->status == GF_SVC_STARTED ||
+ svc_proc->status == GF_SVC_DISCONNECTED) {
+ parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid))
+ return _gf_true;
+ }
+
+ if (svc_proc->status == GF_SVC_DIED || svc_proc->status == GF_SVC_STOPPING)
+ return _gf_false;
+
+ return _gf_false;
+}
+
+void *
+__gf_find_compatible_svc(gd_node_type daemon)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ struct cds_list_head *svc_procs = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+ switch (daemon) {
+ case GD_NODE_SHD: {
+ svc_procs = &conf->shd_procs;
+ if (!svc_procs)
+ goto out;
+ } break;
+ default:
+ /* Add support for other client daemons here */
+ goto out;
+ }
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+ if (glusterd_is_svcproc_attachable(svc_proc))
+ return (void *)svc_proc;
+ /*
+ * Logic to select one process goes here. Currently there is only one
+ * shd_proc. So selecting the first one;
+ */
+ }
+out:
+ return NULL;
+}
+
+glusterd_svc_proc_t *
+glusterd_svcprocess_new()
+{
+ glusterd_svc_proc_t *new_svcprocess = NULL;
+
+ new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess),
+ gf_gld_mt_glusterd_svc_proc_t);
+
+ if (!new_svcprocess)
+ return NULL;
+
+ CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list);
+ CDS_INIT_LIST_HEAD(&new_svcprocess->svcs);
+ new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify;
+ new_svcprocess->status = GF_SVC_STARTING;
+ return new_svcprocess;
+}
+
+int
+glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conn_t *mux_conn = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *parent_svc = NULL;
+ int pid = -1;
+ gf_boolean_t stop_daemon = _gf_false;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) {
+ /* This is the case when shd process was abnormally killed */
+ pthread_mutex_unlock(&conf->attach_lock);
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ pthread_mutex_lock(&conf->attach_lock);
+ }
+
+ if (!svc->inited) {
+ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s",
+ "glustershd");
+ if (ret < 0)
+ goto unlock;
+
+ ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s",
+ pidfile);
+ if (ret < 0)
+ goto unlock;
+
+ if (gf_is_service_running(pidfile, &pid)) {
+ /* Just connect is required, but we don't know what happens
+ * during the disconnect. So better to reattach.
+ */
+ mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid);
+ }
+
+ if (!mux_proc) {
+ if (pid != -1 && sys_access(pidfile, R_OK) == 0) {
+ /* stale pid file, stop and unlink it. This has to be
+ * done outside the attach_lock.
+ */
+ stop_daemon = _gf_true;
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+ if (mux_proc) {
+ /* Take first entry from the process */
+ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ mux_conn = &parent_svc->conn;
+ if (volinfo)
+ volinfo->shd.attached = _gf_true;
+ } else {
+ mux_proc = glusterd_svcprocess_new();
+ if (!mux_proc) {
+ ret = -1;
+ goto unlock;
+ }
+ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+ }
+ svc->svc_proc = mux_proc;
+ cds_list_del_init(&svc->mux_svc);
+ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+ ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc);
+ if (ret) {
+ pthread_mutex_unlock(&conf->attach_lock);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+ "Failed to init shd "
+ "service");
+ goto out;
+ }
+ gf_msg_debug(THIS->name, 0, "shd service initialized");
+ svc->inited = _gf_true;
+ }
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->attach_lock);
+out:
+ if (stop_daemon) {
+ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
+ glusterd_unlink_file(pidfile);
+ }
+ return ret;
+}
+
+void *
+__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ struct cds_list_head *svc_procs = NULL;
+ glusterd_svc_t *svc = NULL;
+ pid_t mux_pid = -1;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ if (!conf)
+ return NULL;
+
+ switch (daemon) {
+ case GD_NODE_SHD: {
+ svc_procs = &conf->shd_procs;
+ if (!svc_procs)
+ return NULL;
+ } break;
+ default:
+ /* Add support for other client daemons here */
+ return NULL;
+ }
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+ cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc)
+ {
+ if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) {
+ if (mux_pid == pid &&
+ glusterd_is_svcproc_attachable(svc_proc)) {
+ /*TODO
+ * inefficient loop, but at the moment, there is only
+ * one shd.
+ */
+ return svc_proc;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+static int32_t
+my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
+{
+ call_frame_t *frame = v_frame;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
+
+ STACK_DESTROY(frame->root);
+out:
+ return 0;
+}
+
+static int32_t
+glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *v_frame)
+{
+ call_frame_t *frame = v_frame;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *svc = frame->cookie;
+ glusterd_conf_t *conf = NULL;
+ int *flag = (int *)frame->local;
+ xlator_t *this = THIS;
+ int ret = -1;
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (!strcmp(svc->name, "glustershd")) {
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ goto out;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ goto out;
+ }
+ }
+
+ if (!iov) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "iov is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (rsp.op_ret == 0) {
+ svc->online = _gf_true;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s attached successfully to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s failed to attach to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ if (!strcmp(svc->name, "glustershd")) {
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ }
+ }
+out:
+ if (flag) {
+ GF_FREE(flag);
+ }
+
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+extern size_t
+build_volfile_path(char *volume_id, char *path, size_t path_len,
+ char *trusted_str, dict_t *dict);
+
+int
+__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ struct rpc_clnt *rpc, char *volfile_id,
+ int op)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ '\0',
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ int32_t spec_fd = -1;
+ size_t file_len = -1;
+ char *volfile_content = NULL;
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ dict_t *dict = NULL;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ struct rpc_clnt_connection *conn;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = THIS->private;
+ extern struct rpc_clnt_program gd_brick_prog;
+ fop_cbk_fn_t cbkfn = my_callback;
+
+ if (!rpc) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL,
+ "called with null rpc");
+ return -1;
+ }
+
+ conn = &rpc->conn;
+ if (!conn->connected || conn->disconnected) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
+ "not connected yet");
+ return -1;
+ }
+
+ brick_req.op = op;
+ brick_req.name = volfile_id;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+ brick_req.dict.dict_val = NULL;
+ brick_req.dict.dict_len = 0;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto *errlbl;
+ }
+
+ if (op == GLUSTERD_SVC_ATTACH) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -ENOMEM;
+ goto *errlbl;
+ }
+
+ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict);
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "Unable to stat %s (%s)", path, strerror(errno));
+ ret = -EINVAL;
+ goto *errlbl;
+ }
+
+ file_len = stbuf.st_size;
+ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
+ if (!volfile_content) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -ENOMEM;
+ goto *errlbl;
+ }
+ spec_fd = open(path, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "failed to read volfile %s", path);
+ ret = -EIO;
+ goto *errlbl;
+ }
+ ret = sys_read(spec_fd, volfile_content, file_len);
+ if (ret == file_len) {
+ brick_req.input.input_val = volfile_content;
+ brick_req.input.input_len = file_len;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "read failed on path %s. File size=%" GF_PRI_SIZET
+ "read size=%d",
+ path, file_len, ret);
+ ret = -EIO;
+ goto *errlbl;
+ }
+ if (dict->count > 0) {
+ ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val,
+ &brick_req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto *errlbl;
+ }
+ }
+
+ frame->cookie = svc;
+ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
+ *((int *)frame->local) = flags;
+ cbkfn = glusterd_svc_attach_cbk;
+ }
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf) {
+ goto *errlbl;
+ }
+ errlbl = &&maybe_free_iobuf;
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto *errlbl;
+ }
+ errlbl = &&free_iobref;
+
+ iobref_add(iobref, iobuf);
+ /*
+ * Drop our reference to the iobuf. The iobref should already have
+ * one after iobref_add, so when we unref that we'll free the iobuf as
+ * well. This allows us to pass just the iobref as frame->local.
+ */
+ iobuf_unref(iobuf);
+ /* Set the pointer to null so we don't free it on a later error. */
+ iobuf = NULL;
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1) {
+ goto *errlbl;
+ }
+ iov.iov_len = ret;
+
+ /* Send the msg */
+ GF_ATOMIC_INC(conf->blockers);
+ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+ return ret;
+
+free_iobref:
+ iobref_unref(iobref);
+maybe_free_iobuf:
+ if (iobuf) {
+ iobuf_unref(iobuf);
+ }
+err:
+ if (dict)
+ dict_unref(dict);
+ if (brick_req.dict.dict_val)
+ GF_FREE(brick_req.dict.dict_val);
+
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ return -1;
+}
+
+int
+glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
+{
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+ int tries;
+ rpc_clnt_t *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO,
+ "adding svc %s (volume=%s) to existing "
+ "process with pid %d",
+ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+
+ rpc = rpc_clnt_ref(svc->conn.rpc);
+ for (tries = 15; tries > 0; --tries) {
+ /* There might be a case that the volume for which we're attempting to
+ * attach a shd svc might become stale and in the process of deletion.
+ * Given that the volinfo object is being already passed here before
+ * that sequence of operation has happened we might be operating on a
+ * stale volume. At every sync task switch we should check for existance
+ * of the volume now
+ */
+ if (!glusterd_volume_exists(volinfo->volname)) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "Volume %s "
+ " is marked as stale, not attempting further shd svc attach "
+ "attempts",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ }
+ if (rpc) {
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = __glusterd_send_svc_configure_req(
+ svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (!ret) {
+ volinfo->shd.attached = _gf_true;
+ goto out;
+ }
+ }
+ /*
+ * It might not actually be safe to manipulate the lock
+ * like this, but if we don't then the connection can
+ * never actually complete and retries are useless.
+ * Unfortunately, all of the alternatives (e.g. doing
+ * all of this in a separate thread) are much more
+ * complicated and risky.
+ * TBD: see if there's a better way
+ */
+ synclock_unlock(&conf->big_lock);
+ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ }
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "attach failed for %s(volume=%s)", svc->name, volinfo->volname);
+out:
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ return ret;
+}
+
+int
+glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig)
+{
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+ int tries;
+ rpc_clnt_t *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, svc, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
+
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO,
+ "removing svc %s (volume=%s) from existing "
+ "process with pid %d",
+ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+
+ rpc = rpc_clnt_ref(svc->conn.rpc);
+ for (tries = 15; tries > 0; --tries) {
+ if (rpc) {
+ /*For detach there is no flags, and we are not using sig.*/
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc,
+ svc->proc.volfileid,
+ GLUSTERD_SVC_DETACH);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (!ret) {
+ goto out;
+ }
+ }
+ /*
+ * It might not actually be safe to manipulate the lock
+ * like this, but if we don't then the connection can
+ * never actually complete and retries are useless.
+ * Unfortunately, all of the alternatives (e.g. doing
+ * all of this in a separate thread) are much more
+ * complicated and risky.
+ * TBD: see if there's a better way
+ */
+ synclock_unlock(&conf->big_lock);
+ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ }
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL,
+ "detach failed for %s(volume=%s)", svc->name, volinfo->volname);
+out:
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
index cc98e788bbe..12717dc58ac 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
@@ -16,10 +16,10 @@
#include "glusterd-volgen.h"
int
-glusterd_svcs_reconfigure();
+glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo);
int
-glusterd_svcs_stop();
+glusterd_svcs_stop(glusterd_volinfo_t *vol);
int
glusterd_svcs_manager(glusterd_volinfo_t *volinfo);
@@ -32,14 +32,41 @@ int
glusterd_svc_check_topology_identical(char *svc_name,
glusterd_graph_builder_t builder,
gf_boolean_t *identical);
+int
+glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict,
+ glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t,
+ gf_boolean_t *identical);
+int
+glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict,
+ glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t,
+ gf_boolean_t *identical);
+void
+glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+ char *volfile, size_t len);
+void *
+__gf_find_compatible_svc(gd_node_type daemon);
+
+glusterd_svc_proc_t *
+glusterd_svcprocess_new();
int
-glusterd_svc_check_tier_volfile_identical(char *svc_name,
- glusterd_volinfo_t *volinfo,
- gf_boolean_t *identical);
+glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc);
+
+void *
+__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid);
+
+int
+glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo,
+ int flags);
+
+int
+glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig);
+
int
-glusterd_svc_check_tier_topology_identical(char *svc_name,
- glusterd_volinfo_t *volinfo,
- gf_boolean_t *identical);
+__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag,
+ struct rpc_clnt *rpc, char *volfile_id,
+ int op);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
index 9954605f6e3..18b3fb13630 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
@@ -8,23 +8,24 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glusterd-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-proc-mgmt.h"
#include "glusterd-conn-mgmt.h"
#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
+#include "glusterd-shd-svc-helper.h"
int
glusterd_svc_create_rundir(char *rundir)
{
int ret = -1;
- ret = mkdir_p(rundir, 0777, _gf_true);
+ ret = mkdir_p(rundir, 0755, _gf_true);
if ((ret == -1) && (EEXIST != errno)) {
gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
"Unable to create rundir %s", rundir);
@@ -32,14 +33,14 @@ glusterd_svc_create_rundir(char *rundir)
return ret;
}
-static void
+void
glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
size_t len)
{
snprintf(logfile, len, "%s/%s.log", logdir, server);
}
-static void
+void
glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len)
{
snprintf(volfileid, len, "gluster/%s", server);
@@ -143,7 +144,7 @@ glusterd_svc_init(glusterd_svc_t *svc, char *svc_name)
glusterd_svc_build_rundir(svc_name, priv->rundir, rundir, sizeof(rundir));
ret = glusterd_svc_init_common(svc, svc_name, priv->workdir, rundir,
- DEFAULT_LOG_FILE_DIRECTORY, NULL);
+ priv->logdir, NULL);
return ret;
}
@@ -161,74 +162,92 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
char *localtime_logging = NULL;
char *log_level = NULL;
char daemon_log_level[30] = {0};
+ char msg[1024] = {
+ 0,
+ };
int32_t len = 0;
this = THIS;
GF_ASSERT(this);
priv = this->private;
- GF_ASSERT(priv);
+ GF_VALIDATE_OR_GOTO("glusterd", priv, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ pthread_mutex_lock(&priv->attach_lock);
+ {
+ if (glusterd_proc_is_running(&(svc->proc))) {
+ ret = 0;
+ goto unlock;
+ }
- if (glusterd_proc_is_running(&(svc->proc))) {
- ret = 0;
- goto out;
- }
+ ret = sys_access(svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
+ "Volfile %s is not present", svc->proc.volfile);
+ goto unlock;
+ }
- ret = sys_access(svc->proc.volfile, F_OK);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
- "Volfile %s is not present", svc->proc.volfile);
- goto out;
- }
+ runinit(&runner);
- runinit(&runner);
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
+ svc->proc.logdir, svc->name);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
- if (this->ctx->cmd_args.valgrind) {
- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
- svc->proc.logfile, svc->name);
- if ((len < 0) || (len >= PATH_MAX)) {
- ret = -1;
- goto out;
+ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
- }
+ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
+ svc->proc.volfileserver, "--volfile-id",
+ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+ svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
- svc->proc.volfileserver, "--volfile-id",
- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
- svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
+ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+ &localtime_logging) == 0) {
+ if (strcmp(localtime_logging, "enable") == 0)
+ runner_add_arg(&runner, "--localtime-logging");
+ }
+ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
+ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY),
+ &log_level) == 0) {
+ snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
+ runner_add_arg(&runner, daemon_log_level);
+ }
- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
- &localtime_logging) == 0) {
- if (strcmp(localtime_logging, "enable") == 0)
- runner_add_arg(&runner, "--localtime-logging");
- }
- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) {
- snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
- runner_add_arg(&runner, daemon_log_level);
- }
+ if (this->ctx->cmd_args.global_threading) {
+ runner_add_arg(&runner, "--global-threading");
+ }
- if (cmdline)
- dict_foreach(cmdline, svc_add_args, (void *)&runner);
+ if (cmdline)
+ dict_foreach(cmdline, svc_add_args, (void *)&runner);
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
- "Starting %s service", svc->name);
+ snprintf(msg, sizeof(msg), "Starting %s service", svc->name);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
- if (flags == PROC_START_NO_WAIT) {
- ret = runner_run_nowait(&runner);
- } else {
- synclock_unlock(&priv->big_lock);
- {
- ret = runner_run(&runner);
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait(&runner);
+ } else {
+ synclock_unlock(&priv->big_lock);
+ {
+ ret = runner_run(&runner);
+ }
+ synclock_lock(&priv->big_lock);
}
- synclock_lock(&priv->big_lock);
}
-
+unlock:
+ pthread_mutex_unlock(&priv->attach_lock);
out:
gf_msg_debug(this->name, 0, "Returning %d", ret);
@@ -281,7 +300,8 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir));
- if (!strcmp(server, "quotad")) /*quotad has different volfile name*/
+ if (!strcmp(server, "quotad"))
+ /*quotad has different volfile name*/
snprintf(volfile, len, "%s/%s.vol", dir, server);
else
snprintf(volfile, len, "%s/%s-server.vol", dir, server);
@@ -366,3 +386,151 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event)
return ret;
}
+
+void
+glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+ char *volfile, size_t len)
+{
+ GF_ASSERT(len == PATH_MAX);
+
+ if (!strcmp(server, "glustershd")) {
+ glusterd_svc_build_shd_volfile_path(vol, volfile, len);
+ }
+}
+
+int
+glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc,
+ rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ glusterd_svc_t *tmp = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t need_logging = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!mux_proc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the svc proc data");
+ return -1;
+ }
+
+ /* Currently this function was used for shd svc, if this function is
+ * using for another svc, change ths glustershd reference. We can get
+ * the svc name from any of the attached svc's
+ */
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug(this->name, 0,
+ "glustershd has connected with glusterd.");
+ gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd");
+ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+ {
+ if (svc->online)
+ continue;
+ svc->online = _gf_true;
+ }
+ if (mux_proc->status != GF_SVC_STARTED)
+ mux_proc->status = GF_SVC_STARTED;
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+ {
+ if (svc->online) {
+ if (!need_logging)
+ need_logging = _gf_true;
+ svc->online = _gf_false;
+ }
+ }
+ if (mux_proc->status != GF_SVC_DIED) {
+ svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ if (svc && !glusterd_proc_is_running(&svc->proc)) {
+ mux_proc->status = GF_SVC_DIED;
+ } else {
+ mux_proc->status = GF_SVC_DISCONNECTED;
+ }
+ }
+
+ if (need_logging) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
+ "glustershd has disconnected from glusterd.");
+ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd");
+ }
+ break;
+
+ default:
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+ char *sockpath, int frame_timeout,
+ glusterd_muxsvc_conn_notify_t notify)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ xlator_t *this = THIS;
+ glusterd_svc_t *svc = NULL;
+
+ options = dict_new();
+ if (!this || !options)
+ goto out;
+
+ svc = cds_list_entry(conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the service");
+ goto out;
+ }
+
+ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
+ SLEN("transport.socket.ignore-enoent"), 1);
+ if (ret)
+ goto out;
+
+ /* @options is free'd by rpc_transport when destroyed */
+ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify,
+ mux_proc);
+ if (ret)
+ goto out;
+
+ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
+ if (ret < 0)
+ goto out;
+ else
+ ret = 0;
+
+ conn->frame_timeout = frame_timeout;
+ conn->rpc = rpc;
+ mux_proc->notify = notify;
+out:
+ if (options)
+ dict_unref(options);
+ if (ret) {
+ if (rpc) {
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+ }
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
index c850bfda68f..5daee993833 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
@@ -13,9 +13,12 @@
#include "glusterd-proc-mgmt.h"
#include "glusterd-conn-mgmt.h"
+#include "glusterd-rcu.h"
struct glusterd_svc_;
+
typedef struct glusterd_svc_ glusterd_svc_t;
+typedef struct glusterd_svc_proc_ glusterd_svc_proc_t;
typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc);
@@ -25,16 +28,38 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags);
typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig);
typedef int (*glusterd_svc_reconfigure_t)(void *data);
+typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc,
+ rpc_clnt_event_t event);
+
+typedef enum gf_svc_status {
+ GF_SVC_STARTING,
+ GF_SVC_STARTED,
+ GF_SVC_STOPPING,
+ GF_SVC_DISCONNECTED,
+ GF_SVC_DIED,
+} gf_svc_status_t;
+
+struct glusterd_svc_proc_ {
+ struct cds_list_head svc_proc_list;
+ struct cds_list_head svcs;
+ glusterd_muxsvc_conn_notify_t notify;
+ rpc_clnt_t *rpc;
+ void *data;
+ gf_svc_status_t status;
+};
+
struct glusterd_svc_ {
- char name[NAME_MAX];
glusterd_conn_t conn;
- glusterd_proc_t proc;
glusterd_svc_manager_t manager;
glusterd_svc_start_t start;
glusterd_svc_stop_t stop;
+ glusterd_svc_reconfigure_t reconfigure;
+ glusterd_svc_proc_t *svc_proc;
+ struct cds_list_head mux_svc;
+ glusterd_proc_t proc;
+ char name[NAME_MAX];
gf_boolean_t online;
gf_boolean_t inited;
- glusterd_svc_reconfigure_t reconfigure;
};
int
@@ -58,6 +83,10 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
size_t len);
void
+glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+ size_t len);
+
+void
glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len);
void
@@ -69,4 +98,15 @@ glusterd_svc_reconfigure(int (*create_volfile)());
int
glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event);
+int
+glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn,
+ rpc_clnt_event_t event);
+
+int
+glusterd_proc_get_pid(glusterd_proc_t *proc);
+
+int
+glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+ char *sockpath, int frame_timeout,
+ glusterd_muxsvc_conn_notify_t notify);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 0bf03358ffd..b73d37ad08e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -52,13 +52,13 @@ gd_collate_errors(struct syncargs *args, int op_ret, int op_errno,
args->op_ret = op_ret;
args->op_errno = op_errno;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(peerid, NULL);
if (peerinfo)
peer_str = gf_strdup(peerinfo->hostname);
else
peer_str = gf_strdup(uuid_utoa(uuid));
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (op_errstr && strcmp(op_errstr, "")) {
len = snprintf(err_str, sizeof(err_str) - 1, "Error: %s",
@@ -143,6 +143,8 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req)
if (!req)
return;
+ if (req->dict.dict_val)
+ GF_FREE(req->dict.dict_val);
GF_FREE(req->input.input_val);
GF_FREE(req);
}
@@ -228,7 +230,6 @@ glusterd_syncop_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
case GD_OP_CREATE_VOLUME:
case GD_OP_ADD_BRICK:
case GD_OP_START_VOLUME:
- case GD_OP_ADD_TIER_BRICK:
ret = glusterd_aggr_brick_mount_dirs(aggr, rsp);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0,
@@ -309,11 +310,15 @@ glusterd_syncop_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
ret = glusterd_max_opversion_use_rsp_dict(aggr, rsp);
break;
- case GD_OP_TIER_STATUS:
- case GD_OP_DETACH_TIER_STATUS:
- case GD_OP_REMOVE_TIER_BRICK:
- ret = glusterd_volume_tier_use_rsp_dict(aggr, rsp);
- /* FALLTHROUGH */
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_use_rsp_dict(aggr, rsp);
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict(aggr, rsp);
+ break;
+
default:
break;
}
@@ -401,8 +406,11 @@ gd_syncop_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
gf_uuid_copy(req.txn_id, txn_id);
@@ -502,8 +510,11 @@ gd_syncop_mgmt_v3_unlock(dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
&req.dict.dict_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
gf_uuid_copy(req.uuid, my_uuid);
gf_uuid_copy(req.txn_id, txn_id);
@@ -560,20 +571,21 @@ _gd_syncop_mgmt_lock_cbk(struct rpc_req *req, struct iovec *iov, int count,
gf_uuid_copy(args->uuid, rsp.uuid);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(*peerid, NULL);
if (peerinfo) {
/* Set peer as locked, so we unlock only the locked peers */
if (rsp.op_ret == 0)
peerinfo->locked = _gf_true;
+ RCU_READ_UNLOCK;
} else {
+ RCU_READ_UNLOCK;
rsp.op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_PEER_NOT_FOUND,
"Could not find peer with "
"ID %s",
uuid_utoa(*peerid));
}
- rcu_read_unlock();
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
@@ -661,18 +673,19 @@ _gd_syncop_mgmt_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count,
gf_uuid_copy(args->uuid, rsp.uuid);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(*peerid, NULL);
if (peerinfo) {
peerinfo->locked = _gf_false;
+ RCU_READ_UNLOCK;
} else {
+ RCU_READ_UNLOCK;
rsp.op_ret = -1;
gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_PEER_NOT_FOUND,
"Could not find peer with "
"ID %s",
uuid_utoa(*peerid));
}
- rcu_read_unlock();
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
@@ -770,9 +783,9 @@ _gd_syncop_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
ret = -1;
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -835,16 +848,21 @@ gd_syncop_mgmt_stage_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args,
uuid_t *peerid = NULL;
req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_stage_req_t);
- if (!req)
+ if (!req) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
gf_uuid_copy(req->uuid, my_uuid);
req->op = op;
ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val,
&req->buf.buf_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
if (ret)
@@ -896,6 +914,8 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (rsp.output.output_len) {
args->dict = dict_new();
if (!args->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
args->op_errno = ENOMEM;
goto out;
@@ -903,8 +923,11 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len,
&args->dict);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL, NULL);
goto out;
+ }
}
args->op_ret = rsp.op_ret;
@@ -1072,9 +1095,9 @@ _gd_syncop_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == 0);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
ret = -1;
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
@@ -1145,16 +1168,21 @@ gd_syncop_mgmt_commit_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args,
uuid_t *peerid = NULL;
req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_commit_req_t);
- if (!req)
+ if (!req) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
gf_uuid_copy(req->uuid, my_uuid);
req->op = op;
ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val,
&req->buf.buf_len);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
+ }
GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
if (ret)
@@ -1182,10 +1210,15 @@ gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
struct syncargs args = {0};
this = THIS;
- synctask_barrier_init((&args));
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1209,7 +1242,7 @@ gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
peer_uuid, txn_id);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1266,8 +1299,10 @@ gd_stage_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
GF_ASSERT(conf);
rsp_dict = dict_new();
- if (!rsp_dict)
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
if ((op == GD_OP_CREATE_VOLUME) || (op == GD_OP_ADD_BRICK) ||
(op == GD_OP_START_VOLUME))
@@ -1312,10 +1347,13 @@ stage_done:
}
gd_syncargs_init(&args, aggr_dict);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1334,7 +1372,7 @@ stage_done:
req_dict, op_ctx);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1383,6 +1421,8 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
char *errstr = NULL;
struct syncargs args = {0};
int type = GF_QUOTA_OPTION_TYPE_NONE;
+ uint32_t cmd = 0;
+ gf_boolean_t origin_glusterd = _gf_false;
this = THIS;
GF_ASSERT(this);
@@ -1391,6 +1431,7 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1438,10 +1479,30 @@ commit_done:
}
gd_syncargs_init(&args, op_ctx);
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
+ origin_glusterd = is_origin_glusterd(req_dict);
+
+ if (op == GD_OP_STATUS_VOLUME) {
+ ret = dict_get_uint32(req_dict, "cmd", &cmd);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=cmd", NULL);
+ goto out;
+ }
+
+ if (origin_glusterd) {
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ ret = 0;
+ goto out;
+ }
+ }
+ }
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before the
@@ -1460,7 +1521,7 @@ commit_done:
req_dict, op_ctx);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (0 == peer_cnt) {
ret = 0;
@@ -1516,11 +1577,14 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
goto out;
}
- synctask_barrier_init((&args));
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
peer_cnt = 0;
if (cluster_lock) {
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were available before
@@ -1541,7 +1605,7 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
peer_cnt++;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
} else {
ret = dict_get_int32(op_ctx, "hold_global_locks", &global);
if (!ret && global)
@@ -1549,7 +1613,7 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
else
type = "vol";
if (volname || global) {
- rcu_read_lock();
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
{
/* Only send requests to peers who were
@@ -1568,7 +1632,7 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
tmp_uuid, txn_id);
peer_cnt++;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
}
}
@@ -1644,6 +1708,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
char **op_errstr)
{
glusterd_pending_node_t *pending_node = NULL;
+ glusterd_pending_node_t *tmp = NULL;
struct cds_list_head selected = {
0,
};
@@ -1653,10 +1718,12 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
rpc_clnt_t *rpc = NULL;
dict_t *rsp_dict = NULL;
int32_t cmd = GF_OP_CMD_NONE;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1681,40 +1748,33 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
rsp_dict = NULL;
brick_count = 0;
- cds_list_for_each_entry(pending_node, &selected, list)
+ cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
{
rpc = glusterd_pending_node_get_rpc(pending_node);
+ /* In the case of rebalance if the rpc object is null, we try to
+ * create the rpc object. if the rebalance daemon is down, it returns
+ * -1. otherwise, rpc object will be created and referenced.
+ */
if (!rpc) {
- if (pending_node->type == GD_NODE_REBALANCE) {
- ret = 0;
- glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
+ volinfo = pending_node->node;
+ ret = glusterd_rebalance_rpc_create(volinfo);
+ if (ret) {
+ ret = 0;
+ glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
+ goto out;
+ } else {
+ rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
+ }
+ } else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+ "Brick Op failed "
+ "due to rpc failure.");
goto out;
}
-
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
- "Brick Op failed "
- "due to rpc failure.");
- goto out;
}
- /* Redirect operation to be detach tier via rebalance flow. */
- ret = dict_get_int32(req_dict, "command", &cmd);
- if (!ret) {
- if (cmd == GF_OP_CMD_DETACH_START) {
- /* this change is left to support backward
- * compatibility. */
- op = GD_OP_REBALANCE;
- ret = dict_set_int32(req_dict, "rebalance-command",
- GF_DEFRAG_CMD_START_DETACH_TIER);
- } else if (cmd == GF_DEFRAG_CMD_DETACH_START) {
- op = GD_OP_REMOVE_TIER_BRICK;
- ret = dict_set_int32(req_dict, "rebalance-command",
- GF_DEFRAG_CMD_DETACH_START);
- }
- if (ret)
- goto out;
- }
ret = gd_syncop_mgmt_brick_op(rpc, pending_node, op, req_dict, op_ctx,
op_errstr);
if (op == GD_OP_STATUS_VOLUME) {
@@ -1726,24 +1786,19 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
if (dict_get(op_ctx, "client-count"))
break;
}
- } else if (cmd == GF_OP_CMD_DETACH_START) {
- op = GD_OP_REMOVE_BRICK;
- dict_del(req_dict, "rebalance-command");
- } else if (cmd == GF_DEFRAG_CMD_DETACH_START) {
- op = GD_OP_REMOVE_TIER_BRICK;
- dict_del(req_dict, "rebalance-command");
}
if (ret)
goto out;
brick_count++;
glusterd_pending_node_put_rpc(pending_node);
+ GF_FREE(pending_node);
}
pending_node = NULL;
ret = 0;
out:
- if (pending_node)
+ if (pending_node && pending_node->node)
glusterd_pending_node_put_rpc(pending_node);
if (rsp_dict)
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
index 37195ef0112..a265f2135c6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
@@ -10,7 +10,7 @@
#ifndef __RPC_SYNCOP_H
#define __RPC_SYNCOP_H
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "glusterd-sm.h"
#include "glusterd.h"
@@ -32,7 +32,7 @@
ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \
cbk, (xdrproc_t)xdrproc); \
if (!ret) \
- synctask_yield(stb->task); \
+ synctask_yield(stb->task, NULL); \
else \
gf_asprintf(&stb->errstr, \
"%s failed. Check log file" \
diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c
deleted file mode 100644
index 9cc1b4a225e..00000000000
--- a/xlators/mgmt/glusterd/src/glusterd-tier.c
+++ /dev/null
@@ -1,1378 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
- */
-
-#include "common-utils.h"
-#include "cli1-xdr.h"
-#include "xdr-generic.h"
-#include "glusterd.h"
-#include "glusterd-op-sm.h"
-#include "glusterd-store.h"
-#include "glusterd-geo-rep.h"
-#include "glusterd-utils.h"
-#include "glusterd-volgen.h"
-#include "run.h"
-#include "syscall.h"
-#include "byte-order.h"
-#include "glusterd-svc-helper.h"
-#include "compat-errno.h"
-#include "glusterd-tierd-svc.h"
-#include "glusterd-tierd-svc-helper.h"
-#include "glusterd-messages.h"
-#include "glusterd-mgmt.h"
-#include "glusterd-syncop.h"
-
-#include <sys/wait.h>
-#include <dlfcn.h>
-
-extern struct rpc_clnt_program gd_brick_prog;
-
-const char *gd_tier_op_list[GF_DEFRAG_CMD_TYPE_MAX] = {
- [GF_DEFRAG_CMD_START_TIER] = "start",
- [GF_DEFRAG_CMD_STOP_TIER] = "stop",
-};
-
-int
-__glusterd_handle_tier(rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- gf_cli_req cli_req = {{
- 0,
- }};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_TIER_START_STOP;
- char *volname = NULL;
- int32_t cmd = 0;
- char msg[2048] = {
- 0,
- };
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char err_str[2048] = {0};
-
- this = THIS;
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- GF_VALIDATE_OR_GOTO(this->name, req, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
- if (ret < 0) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new();
-
- ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to "
- "unserialize req-buffer to dictionary");
- snprintf(msg, sizeof(msg),
- "Unable to decode the "
- "command");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
- }
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- snprintf(msg, sizeof(msg), "Unable to get volume name");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name, "
- "while handling tier command");
- goto out;
- }
-
- ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
- &cmd);
- if (ret) {
- snprintf(msg, sizeof(msg), "Unable to get the command");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get the cmd");
- goto out;
- }
-
- if (conf->op_version < GD_OP_VERSION_3_7_0) {
- snprintf(msg, sizeof(msg),
- "Cannot execute command. The "
- "cluster is operating at version %d. Tier command "
- "%s is unavailable in this version",
- conf->op_version, gd_tier_op_list[cmd]);
- ret = -1;
- goto out;
- }
-
- if (conf->op_version < GD_OP_VERSION_3_10_0) {
- gf_msg_debug(this->name, 0,
- "The cluster is operating at "
- "version less than or equal to %d. Falling back "
- "to syncop framework.",
- GD_OP_VERSION_3_7_5);
- switch (cmd) {
- case GF_DEFRAG_CMD_DETACH_STOP:
- ret = dict_set_int32n(dict, "rebalance-command",
- SLEN("rebalance-command"),
- GF_DEFRAG_CMD_STOP_DETACH_TIER);
- break;
-
- case GF_DEFRAG_CMD_DETACH_COMMIT:
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Volume "
- "%s does not exist",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- GD_MSG_VOL_NOT_FOUND, "%s", err_str);
- goto out;
- }
- ret = glusterd_set_detach_bricks(dict, volinfo);
- ret = dict_set_int32n(dict, "command", SLEN("command"),
- GF_OP_CMD_DETACH_COMMIT);
- break;
- case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Volume "
- "%s does not exist",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- GD_MSG_VOL_NOT_FOUND, "%s", err_str);
- goto out;
- }
- ret = glusterd_set_detach_bricks(dict, volinfo);
- ret = dict_set_int32n(dict, "command", SLEN("command"),
- GF_OP_CMD_DETACH_COMMIT_FORCE);
- break;
- case GF_DEFRAG_CMD_DETACH_START:
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Volume "
- "%s does not exist",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- GD_MSG_VOL_NOT_FOUND, "%s", err_str);
- goto out;
- }
- ret = glusterd_set_detach_bricks(dict, volinfo);
- ret = dict_set_int32n(dict, "command", SLEN("command"),
- GF_OP_CMD_DETACH_START);
- break;
-
- default:
- break;
- }
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to set dict");
- goto out;
- }
- if ((cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
- (cmd == GF_DEFRAG_CMD_DETACH_STATUS) ||
- (cmd == GF_DEFRAG_CMD_START_TIER) ||
- (cmd == GF_DEFRAG_CMD_DETACH_STOP)) {
- ret = glusterd_op_begin(req, GD_OP_DEFRAG_BRICK_VOLUME, dict, msg,
- sizeof(msg));
- } else
- ret = glusterd_op_begin(req, GD_OP_REMOVE_BRICK, dict, msg,
- sizeof(msg));
-
- glusterd_friend_sm();
- glusterd_op_sm();
-
- } else {
- switch (cmd) {
- case GF_DEFRAG_CMD_STATUS_TIER:
- cli_op = GD_OP_TIER_STATUS;
- break;
-
- case GF_DEFRAG_CMD_DETACH_STATUS:
- cli_op = GD_OP_DETACH_TIER_STATUS;
- break;
-
- case GF_DEFRAG_CMD_DETACH_STOP:
- cli_op = GD_OP_REMOVE_TIER_BRICK;
- break;
-
- case GF_DEFRAG_CMD_DETACH_COMMIT:
- case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
- case GF_DEFRAG_CMD_DETACH_START:
- cli_op = GD_OP_REMOVE_TIER_BRICK;
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Volume "
- "%s does not exist",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- GD_MSG_VOL_NOT_FOUND, "%s", err_str);
- goto out;
- }
- ret = glusterd_set_detach_bricks(dict, volinfo);
- break;
-
- default:
- break;
- }
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
- "dict set failed");
- goto out;
- }
- ret = glusterd_mgmt_v3_initiate_all_phases(req, cli_op, dict);
- }
-
-out:
- if (ret) {
- if (msg[0] == '\0')
- snprintf(msg, sizeof(msg), "Tier operation failed");
- ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg);
- }
-
- return ret;
-}
-
-int
-glusterd_handle_tier(rpcsvc_request_t *req)
-{
- return glusterd_big_locked_handler(req, __glusterd_handle_tier);
-}
-
-int
-glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {
- 0,
- };
- int keylen;
- int32_t flag = 0;
- char err_str[4096] = {
- 0,
- };
- int need_rebalance = 0;
- int force = 0;
- int32_t cmd = 0;
- int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
- char *task_id_str = NULL;
- dict_t *bricks_dict = NULL;
- char *brick_tmpstr = NULL;
- uint32_t commit_hash = 0;
- int detach_commit = 0;
- void *tier_info = NULL;
- char *cold_shd_key = NULL;
- char *hot_shd_key = NULL;
- int delete_key = 1;
- glusterd_svc_t *svc = NULL;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- GF_VALIDATE_OR_GOTO(this->name, dict, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
-
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
- "Unable to get volinfo");
- goto out;
- }
-
- ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
- &cmd);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "cmd not found");
- goto out;
- }
-
- if (is_origin_glusterd(dict) && (cmd != GF_DEFRAG_CMD_DETACH_START)) {
- if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
- ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, dict,
- GF_REMOVE_BRICK_TID_KEY,
- SLEN(GF_REMOVE_BRICK_TID_KEY));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
- "Failed to set remove-brick-id");
- goto out;
- }
- }
- }
- /*check only if a tierd is supposed to be running
- * if no brick in the tierd volume is a local brick
- * skip it */
- cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
- {
- if (glusterd_is_local_brick(this, volinfo, brickinfo)) {
- flag = _gf_true;
- break;
- }
- }
- if (!flag)
- goto out;
-
- ret = -1;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_DETACH_STOP:
- /* Fall back to the old volume file */
- cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
- brick_list)
- {
- if (!brickinfo->decommissioned)
- continue;
- brickinfo->decommissioned = 0;
- }
- volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
- ret = volinfo->tierd.svc.manager(&(volinfo->tierd.svc), volinfo,
- PROC_START_NO_WAIT);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_MANAGER_FUNCTION_FAILED,
- "Calling manager for tier "
- "failed on volume: %s for "
- "detach stop",
- volinfo->volname);
- goto out;
- }
-
- ret = glusterd_create_volfiles_and_notify_services(volinfo);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
- goto out;
- }
-
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
- "failed to store volinfo");
- goto out;
- }
- ret = 0;
- goto out;
-
- case GF_DEFRAG_CMD_DETACH_START:
- volinfo->tier.op = GD_OP_DETACH_TIER;
- svc = &(volinfo->tierd.svc);
- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_MANAGER_FUNCTION_FAILED,
- "calling manager for tier "
- "failed on volume: %s for "
- "detach start",
- volname);
- goto out;
- }
- ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
- SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
- if (ret) {
- gf_msg_debug(this->name, errno, "Missing remove-brick-id");
- ret = 0;
- } else {
- ret = dict_set_strn(rsp_dict, GF_REMOVE_BRICK_TID_KEY,
- SLEN(GF_REMOVE_BRICK_TID_KEY), task_id_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- GD_MSG_DICT_SET_FAILED,
- "failed to set remove-brick-id"
- "in the dict");
- }
- gf_uuid_parse(task_id_str, volinfo->tier.rebalance_id);
- }
- force = 0;
-
- break;
-
- case GF_DEFRAG_CMD_DETACH_COMMIT:
- if (volinfo->decommission_in_progress) {
- gf_asprintf(op_errstr,
- "use 'force' option as "
- "migration is in progress");
- goto out;
- }
- if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) {
- gf_asprintf(op_errstr,
- "use 'force' option as "
- "migration has failed");
- goto out;
- }
- /* Fall through */
-
- case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
- if (cmd == GF_DEFRAG_CMD_DETACH_COMMIT_FORCE) {
- svc = &(volinfo->tierd.svc);
- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_MANAGER_FUNCTION_FAILED,
- "calling manager for tier "
- "failed on volume: %s for "
- "commit force",
- volname);
- goto out;
- }
- }
- glusterd_op_perform_detach_tier(volinfo);
- detach_commit = 1;
-
- /* Disabling ctr when detaching a tier, since
- * currently tier is the only consumer of ctr.
- * Revisit this code when this constraint no
- * longer exist.
- */
- dict_deln(volinfo->dict, "features.ctr-enabled",
- SLEN("features.ctr-enabled"));
- dict_deln(volinfo->dict, "cluster.tier-mode",
- SLEN("cluster.tier-mode"));
-
- hot_shd_key = gd_get_shd_key(volinfo->tier_info.hot_type);
- cold_shd_key = gd_get_shd_key(volinfo->tier_info.cold_type);
- if (hot_shd_key) {
- /*
- * Since post detach, shd graph will not
- * contain hot tier. So we need to clear
- * option set for hot tier. For a tiered
- * volume there can be different key
- * for both hot and cold. If hot tier is
- * shd compatible then we need to remove
- * the configured value when detaching a tier,
- * only if the key's are different or
- * cold key is NULL. So we will set
- * delete_key first, and if cold key is not
- * null and they are equal then we will clear
- * the flag. Otherwise we will delete the
- * key.
- */
-
- if (cold_shd_key)
- delete_key = strcmp(hot_shd_key, cold_shd_key);
- if (delete_key)
- dict_del(volinfo->dict, hot_shd_key);
- }
- /* fall through */
-
- if (volinfo->decommission_in_progress) {
- if (volinfo->tier.defrag) {
- LOCK(&volinfo->rebal.defrag->lock);
- /* Fake 'rebalance-complete' so the
- * graph change
- * happens right away */
- volinfo->tier.defrag_status = GF_DEFRAG_STATUS_COMPLETE;
-
- UNLOCK(&volinfo->tier.defrag->lock);
- }
- }
-
- volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
- ret = 0;
- force = 1;
- break;
- default:
- gf_asprintf(op_errstr,
- "tier command failed. Invalid "
- "opcode");
- ret = -1;
- goto out;
- }
-
- count = glusterd_set_detach_bricks(dict, volinfo);
-
- if (cmd == GF_DEFRAG_CMD_DETACH_START) {
- bricks_dict = dict_new();
- if (!bricks_dict) {
- ret = -1;
- goto out;
- }
- ret = dict_set_int32n(bricks_dict, "count", SLEN("count"), count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "Failed to save remove-brick count");
- goto out;
- }
- }
-
- while (i <= count) {
- keylen = snprintf(key, sizeof(key), "brick%d", i);
- ret = dict_get_strn(dict, key, keylen, &brick);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get %s", key);
- goto out;
- }
-
- if (cmd == GF_DEFRAG_CMD_DETACH_START) {
- brick_tmpstr = gf_strdup(brick);
- if (!brick_tmpstr) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "Failed to duplicate brick name");
- goto out;
- }
- ret = dict_set_dynstrn(bricks_dict, key, keylen, brick_tmpstr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "Failed to add brick to dict");
- goto out;
- }
- brick_tmpstr = NULL;
- }
-
- ret = glusterd_op_perform_remove_brick(volinfo, brick, force,
- &need_rebalance);
- if (ret)
- goto out;
- i++;
- }
-
- if (detach_commit) {
- /* Clear related information from volinfo */
- tier_info = ((void *)(&volinfo->tier_info));
- memset(tier_info, 0, sizeof(volinfo->tier_info));
- }
-
- if (cmd == GF_DEFRAG_CMD_DETACH_START)
- volinfo->tier.dict = dict_ref(bricks_dict);
-
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &replica_count);
- if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
- "changing replica count %d to %d on volume %s",
- volinfo->replica_count, replica_count, volinfo->volname);
- volinfo->replica_count = replica_count;
- volinfo->sub_count = replica_count;
- volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
-
- /*
- * volinfo->type and sub_count have already been set for
- * volumes undergoing a detach operation, they should not
- * be modified here.
- */
- if ((replica_count == 1) && (cmd != GF_DEFRAG_CMD_DETACH_COMMIT) &&
- (cmd != GF_DEFRAG_CMD_DETACH_COMMIT_FORCE)) {
- if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
- volinfo->type = GF_CLUSTER_TYPE_NONE;
- /* backward compatibility */
- volinfo->sub_count = 0;
- }
- }
- }
- volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count);
-
- ret = glusterd_create_volfiles_and_notify_services(volinfo);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
- "failed to create"
- "volfiles");
- goto out;
- }
-
- ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL,
- "failed to store volinfo");
- goto out;
- }
-
- if (cmd == GF_DEFRAG_CMD_DETACH_START &&
- volinfo->status == GLUSTERD_STATUS_STARTED) {
- ret = glusterd_svcs_reconfigure();
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
- "Unable to reconfigure NFS-Server");
- goto out;
- }
- }
- /* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->tier.defrag_status) {
- case GF_DEFRAG_STATUS_FAILED:
- case GF_DEFRAG_STATUS_COMPLETE:
- volinfo->tier.defrag_status = 0;
- /* FALLTHROUGH */
- default:
- break;
- }
- if (!force && need_rebalance) {
- if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
- volinfo->tier.commit_hash = commit_hash;
- }
- /* perform the rebalance operations */
- ret = glusterd_handle_defrag_start(
- volinfo, err_str, sizeof(err_str), GF_DEFRAG_CMD_START_DETACH_TIER,
- /*change this label to GF_DEFRAG_CMD_DETACH_START
- * while removing old code
- */
- glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
-
- if (!ret)
- volinfo->decommission_in_progress = 1;
-
- else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBALANCE_START_FAIL,
- "failed to start the rebalance");
- }
- } else {
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_svcs_manager(volinfo);
- }
-
-out:
- if (ret && err_str[0] && op_errstr)
- *op_errstr = gf_strdup(err_str);
-
- GF_FREE(brick_tmpstr);
- if (bricks_dict)
- dict_unref(bricks_dict);
-
- return ret;
-}
-
-int
-glusterd_op_tier_start_stop(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- glusterd_volinfo_t *volinfo = NULL;
- int32_t ret = -1;
- char *volname = NULL;
- int cmd = -1;
- xlator_t *this = NULL;
- glusterd_brickinfo_t *brick = NULL;
- gf_boolean_t retval = _gf_false;
- glusterd_conf_t *priv = NULL;
- int32_t pid = -1;
- char pidfile[PATH_MAX] = {0};
- int is_force = 0;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- GF_VALIDATE_OR_GOTO(this->name, dict, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
-
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
- goto out;
- }
-
- ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
- &cmd);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get cmd from "
- "dict");
- goto out;
- }
-
- cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)
- {
- if (gf_uuid_compare(MY_UUID, brick->uuid) == 0) {
- retval = _gf_true;
- break;
- }
- }
- /*check if this node needs tierd*/
-
- if (!retval)
- goto out;
-
- if (glusterd_is_volume_started(volinfo) == 0) {
- *op_errstr = gf_strdup(
- "Volume is stopped, start "
- "volume to enable/disable tier.");
- ret = -1;
- goto out;
- }
-
- GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- /* we check if its running and skip so that we don't get a
- * failure during force start
- */
- ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
- if (ret) {
- gf_msg_debug(this->name, 0,
- "Unable to get is_force"
- " from dict");
- }
- ret = dict_set_int32n(volinfo->dict, "force", SLEN("force"),
- is_force);
- if (ret) {
- gf_msg_debug(this->name, errno,
- "Unable to set"
- " is_force to dict");
- }
-
- if (!is_force) {
- if (gf_is_service_running(pidfile, &pid)) {
- gf_asprintf(op_errstr,
- "Tier is already "
- "enabled on volume %s.",
- volinfo->volname);
- goto out;
- }
- }
-
- break;
-
- case GF_DEFRAG_CMD_STOP_TIER:
- if (!gf_is_service_running(pidfile, &pid)) {
- gf_asprintf(op_errstr,
- "Tier is alreaady disabled on "
- "volume %s.",
- volinfo->volname);
- goto out;
- }
- break;
- default:
- gf_asprintf(op_errstr,
- "tier command failed. Invalid "
- "opcode");
- ret = -1;
- goto out;
- }
-
- ret = volinfo->tierd.svc.manager(&(volinfo->tierd.svc), volinfo,
- PROC_START_NO_WAIT);
- if (ret)
- goto out;
-
- ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
- "Failed to store volinfo for tier");
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-glusterd_op_stage_tier(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- char *volname = NULL;
- int ret = -1;
- int32_t cmd = 0;
- char msg[2048] = {0};
- glusterd_volinfo_t *volinfo = NULL;
- char *task_id_str = NULL;
- xlator_t *this = 0;
- int32_t is_force = 0;
- char pidfile[PATH_MAX] = {0};
- int32_t tier_online = -1;
- int32_t pid = -1;
- int32_t brick_count = 0;
- gsync_status_param_t param = {
- 0,
- };
- glusterd_conf_t *priv = NULL;
- gf_boolean_t flag = _gf_false;
- glusterd_brickinfo_t *brickinfo = NULL;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- GF_VALIDATE_OR_GOTO(this->name, dict, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
-
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "volname not found");
- goto out;
- }
-
- ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
- &cmd);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "cmd not found");
- goto out;
- }
-
- ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
- sizeof(msg));
- if (ret) {
- gf_msg_debug(this->name, 0, "cmd validate failed");
- goto out;
- }
-
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER,
- "volume: %s is not a tier "
- "volume",
- volinfo->volname);
- ret = -1;
- goto out;
- }
- /* Check if the connected clients are all of version
- * glusterfs-3.6 and higher. This is needed to prevent some data
- * loss issues that could occur when older clients are connected
- * when rebalance is run. This check can be bypassed by using
- * 'force'
- */
- ret = glusterd_check_client_op_version_support(volname, GD_OP_VERSION_3_6_0,
- NULL);
- if (ret) {
- ret = gf_asprintf(op_errstr,
- "Volume %s has one or "
- "more connected clients of a version"
- " lower than GlusterFS-v3.6.0. "
- "Tier operations not supported in"
- " below this version",
- volname);
- goto out;
- }
- /*check only if a tierd is supposed to be running
- * if no brick in the tierd volume is a local brick
- * skip it */
- cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
- {
- if (glusterd_is_local_brick(this, volinfo, brickinfo)) {
- flag = _gf_true;
- break;
- }
- }
- if (!flag)
- goto out;
-
- GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
- tier_online = gf_is_service_running(pidfile, &pid);
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
- if (ret)
- is_force = 0;
-
- if (brickinfo->status != GF_BRICK_STARTED) {
- gf_asprintf(op_errstr,
- "Received"
- " tier start on volume "
- "with stopped brick %s",
- brickinfo->path);
- ret = -1;
- goto out;
- }
- if ((!is_force) && tier_online) {
- ret = gf_asprintf(op_errstr,
- "Tier daemon is "
- "already running on volume %s",
- volname);
- ret = -1;
- goto out;
- }
- ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
- GD_OP_REBALANCE);
- if (ret) {
- gf_msg(this->name, 0, GF_LOG_ERROR, GD_MSG_REBALANCE_START_FAIL,
- "start validate failed");
- goto out;
- }
- if (volinfo->tier.op == GD_OP_DETACH_TIER) {
- snprintf(msg, sizeof(msg),
- "A detach tier task "
- "exists for volume %s. Either commit it"
- " or stop it before starting a new task.",
- volinfo->volname);
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_OLD_REMOVE_BRICK_EXISTS,
- "Earlier detach-tier"
- " task exists for volume %s.",
- volinfo->volname);
- ret = -1;
- goto out;
- }
- break;
-
- case GF_DEFRAG_CMD_STOP_TIER:
-
- if (!tier_online) {
- ret = gf_asprintf(op_errstr,
- "Tier daemon is "
- "not running on volume %s",
- volname);
- ret = -1;
- goto out;
- }
- break;
-
- case GF_DEFRAG_CMD_DETACH_START:
-
- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get brick count");
- goto out;
- }
-
- if (!tier_online) {
- ret = gf_asprintf(op_errstr,
- "Tier daemon is "
- "not running on volume %s",
- volname);
- ret = -1;
- goto out;
- }
- if (volinfo->tier.op == GD_OP_DETACH_TIER) {
- snprintf(msg, sizeof(msg),
- "An earlier detach tier "
- "task exists for volume %s. Either commit it"
- " or stop it before starting a new task.",
- volinfo->volname);
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_OLD_REMOVE_BRICK_EXISTS,
- "Earlier remove-brick"
- " task exists for volume %s.",
- volinfo->volname);
- ret = -1;
- goto out;
- }
- if (glusterd_is_defrag_on(volinfo)) {
- snprintf(msg, sizeof(msg),
- "Migration is in progress."
- " Please retry after completion");
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_OIP_RETRY_LATER,
- "Migration is"
- "in progress");
- goto out;
- }
-
- ret = glusterd_remove_brick_validate_bricks(
- GF_OP_CMD_NONE, brick_count, dict, volinfo, op_errstr, cmd);
- if (ret)
- goto out;
-
- if (is_origin_glusterd(dict)) {
- ret = glusterd_generate_and_set_task_id(
- dict, GF_REMOVE_BRICK_TID_KEY,
- SLEN(GF_REMOVE_BRICK_TID_KEY));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
- "Failed to generate task-id");
- goto out;
- }
- } else {
- ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
- SLEN(GF_REMOVE_BRICK_TID_KEY),
- &task_id_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- GD_MSG_DICT_GET_FAILED, "Missing remove-brick-id");
- ret = 0;
- }
- }
- break;
-
- case GF_DEFRAG_CMD_DETACH_STOP:
- if (volinfo->tier.op != GD_OP_DETACH_TIER) {
- snprintf(msg, sizeof(msg),
- "Detach-tier "
- "not started");
- ret = -1;
- goto out;
- }
- ret = 0;
- break;
-
- case GF_DEFRAG_CMD_STATUS_TIER:
-
- if (!tier_online) {
- ret = gf_asprintf(op_errstr,
- "Tier daemon is "
- "not running on volume %s",
- volname);
- ret = -1;
- goto out;
- }
- break;
-
- case GF_DEFRAG_CMD_DETACH_COMMIT:
-
- if (volinfo->tier.op != GD_OP_DETACH_TIER) {
- snprintf(msg, sizeof(msg),
- "Detach-tier "
- "not started");
- ret = -1;